main
1from __future__ import annotations
2
3import sys
4from typing import TYPE_CHECKING
5from argparse import ArgumentParser
6
7from .._models import BaseModel
8from ...lib._validators import (
9 get_validators,
10 write_out_file,
11 read_any_format,
12 apply_validators,
13 apply_necessary_remediation,
14)
15
16if TYPE_CHECKING:
17 from argparse import _SubParsersAction
18
19
20def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
21 sub = subparser.add_parser("fine_tunes.prepare_data")
22 sub.add_argument(
23 "-f",
24 "--file",
25 required=True,
26 help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing prompt-completion examples to be analyzed."
27 "This should be the local file path.",
28 )
29 sub.add_argument(
30 "-q",
31 "--quiet",
32 required=False,
33 action="store_true",
34 help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
35 )
36 sub.set_defaults(func=prepare_data, args_model=PrepareDataArgs)
37
38
39class PrepareDataArgs(BaseModel):
40 file: str
41
42 quiet: bool
43
44
45def prepare_data(args: PrepareDataArgs) -> None:
46 sys.stdout.write("Analyzing...\n")
47 fname = args.file
48 auto_accept = args.quiet
49 df, remediation = read_any_format(fname)
50 apply_necessary_remediation(None, remediation)
51
52 validators = get_validators()
53
54 assert df is not None
55
56 apply_validators(
57 df,
58 fname,
59 remediation,
60 validators,
61 auto_accept,
62 write_out_file_func=write_out_file,
63 )