File size: 1,340 Bytes
6a1e686
 
2f3df87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a1e686
 
b0cd906
 
6a1e686
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import numpy as np
from sklearn.metrics import classification_report
import zipfile
import json
import pandas as pd
from .config import TAG_NAMES

def load_data(test_data_path):
    # zip file handler
    zip_file = zipfile.ZipFile('code_classification_dataset.zip')

    # list available files in the container
    names = zip_file.namelist()
    data = []
    features = ["prob_desc_description","prob_desc_input_spec","prob_desc_output_spec"]
    cols = features + ["tags"]
    # extract a specific file from the zip container
    for name in names[1:]:
        f = zip_file.open(name)

    # save the extraced file
    content = f.read()
    d = json.loads(content)
    # json_fmt = json.dumps(d, indent=2)
    # print(json_fmt)
    row = []
    for c in cols:
        row.append(d[c])
    data.append(row)
    df = pd.DataFrame(data, columns=cols)
    return df

def preprocessing(df):
    # Example dataset
    texts = df["prob_desc_description"].values.tolist()
    labels = df[TAG_NAMES].values.tolist()

    # data:
    # texts = ["text1", "text2", ...]  # list of texts
    # labels = [[0,1,0,0,1,0,1,1,0], [0,1,1,0,0,0,0,0,0],, ...] # list of labels
    
    df = pd.DataFrame({'text':texts, 'labels': labels})


def evaluate_model(test_data_path):
    df = load_data(test_data_path)
    df = preprocessing(df)
    return metrics