yakine commited on
Commit
6bc7f9d
·
verified ·
1 Parent(s): 0dc54fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -32,29 +32,27 @@ model_gpt2 = GPT2LMHeadModel.from_pretrained('gpt2')
32
  # Create a pipeline for text generation using GPT-2
33
  text_generator = pipeline("text-generation", model=model_gpt2, tokenizer=tokenizer_gpt2)
34
 
 
35
  # Define prompt template for generating the dataset
36
  prompt_template = """\
37
- You are an AI specialized in generating synthetic tabular data specifically for machine learning purposes.
38
-
39
  Task: Generate a synthetic dataset based on the provided description and column names.
40
-
41
  Description: {description}
42
-
43
  Columns: {columns}
44
-
45
  Instructions:
46
-
47
- Output only the tabular data in valid CSV format.
48
- Include the header row followed by the data rows.
49
- Do not generate any additional text, explanations, comments, or code.
50
- Ensure that the values for each column are contextually appropriate.
51
-
52
- Format Example (do not include this line or the following example in your output):
53
  Column1,Column2,Column3
54
  Value1,Value2,Value3
55
  Value4,Value5,Value6
56
  """
57
 
 
58
  # Define generation parameters
59
  generation_params = {
60
  "top_p": 0.90,
 
32
  # Create a pipeline for text generation using GPT-2
33
  text_generator = pipeline("text-generation", model=model_gpt2, tokenizer=tokenizer_gpt2)
34
 
35
+
36
  # Define prompt template for generating the dataset
37
  prompt_template = """\
38
+ You are an AI designed exclusively for generating synthetic tabular datasets.
 
39
  Task: Generate a synthetic dataset based on the provided description and column names.
 
40
  Description: {description}
 
41
  Columns: {columns}
 
42
  Instructions:
43
+ - Output only the tabular data in valid CSV format.
44
+ - Include the header row followed strictly by the data rows.
45
+ - Do not include any additional text, explanations, comments, or code outside of the CSV data.
46
+ - Ensure that the values for each column are contextually appropriate based on the description.
47
+ - Do not alter the column names or add any new columns.
48
+ - Each row must contain data for all columns without any empty values unless specified.
49
+ - Format Example (do not include this line or the following example in your output):
50
  Column1,Column2,Column3
51
  Value1,Value2,Value3
52
  Value4,Value5,Value6
53
  """
54
 
55
+
56
  # Define generation parameters
57
  generation_params = {
58
  "top_p": 0.90,