main
 1from __future__ import annotations
 2
 3import sys
 4from typing import TYPE_CHECKING
 5from argparse import ArgumentParser
 6
 7from .._models import BaseModel
 8from ...lib._validators import (
 9    get_validators,
10    write_out_file,
11    read_any_format,
12    apply_validators,
13    apply_necessary_remediation,
14)
15
16if TYPE_CHECKING:
17    from argparse import _SubParsersAction
18
19
20def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
21    sub = subparser.add_parser("fine_tunes.prepare_data")
22    sub.add_argument(
23        "-f",
24        "--file",
25        required=True,
26        help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing prompt-completion examples to be analyzed."
27        "This should be the local file path.",
28    )
29    sub.add_argument(
30        "-q",
31        "--quiet",
32        required=False,
33        action="store_true",
34        help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
35    )
36    sub.set_defaults(func=prepare_data, args_model=PrepareDataArgs)
37
38
39class PrepareDataArgs(BaseModel):
40    file: str
41
42    quiet: bool
43
44
45def prepare_data(args: PrepareDataArgs) -> None:
46    sys.stdout.write("Analyzing...\n")
47    fname = args.file
48    auto_accept = args.quiet
49    df, remediation = read_any_format(fname)
50    apply_necessary_remediation(None, remediation)
51
52    validators = get_validators()
53
54    assert df is not None
55
56    apply_validators(
57        df,
58        fname,
59        remediation,
60        validators,
61        auto_accept,
62        write_out_file_func=write_out_file,
63    )