Spaces:
Sleeping
Sleeping
File size: 5,026 Bytes
90c062f b485e94 90c062f b485e94 90c062f b485e94 90c062f b485e94 90c062f b485e94 90c062f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import json
import gradio as gr
import os
import requests
from huggingface_hub import AsyncInferenceClient
HF_TOKEN = os.getenv('HF_TOKEN')
api_url = os.getenv('API_URL')
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
client = AsyncInferenceClient(api_url)
system_message = """
### Instruction:
Refactor the provided Python code to improve its maintainability and efficiency and reduce complexity. Include the refactored code along with the comments on the changes made for improving the metrics.
### Input:
"""
title = "Python Refactoring"
description = """
Please give it 3 to 4 minutes for the model to load and Run , consider using Python code with less than 120 lines of code due to GPU constrainst
"""
css = """.toast-wrap { display: none !important } """
examples=[["""
import pandas as pd
import re
import ast
from code_bert_score import score
import numpy as np
def preprocess_code(source_text):
def remove_comments_and_docstrings(source_code):
source_code = re.sub(r'#.*', '', source_code)
source_code = re.sub(r'(\'\'\'(.*?)\'\'\'|\"\"\"(.*?)\"\"\")', '', source_code, flags=re.DOTALL)
return source_code
pattern = r"```python\s+(.+?)\s+```"
matches = re.findall(pattern, source_text, re.DOTALL)
code_to_process = '\n'.join(matches) if matches else source_text
cleaned_code = remove_comments_and_docstrings(code_to_process)
return cleaned_code
def evaluate_dataframe(df):
results = {'P': [], 'R': [], 'F1': [], 'F3': []}
for index, row in df.iterrows():
try:
cands = [preprocess_code(row['generated_text'])]
refs = [preprocess_code(row['output'])]
P, R, F1, F3 = score(cands, refs, lang='python')
results['P'].append(P[0])
results['R'].append(R[0])
results['F1'].append(F1[0])
results['F3'].append(F3[0])
except Exception as e:
print(f"Error processing row {index}: {e}")
for key in results.keys():
results[key].append(None)
df_metrics = pd.DataFrame(results)
return df_metrics
def evaluate_dataframe_multiple_runs(df, runs=3):
all_results = []
for run in range(runs):
df_metrics = evaluate_dataframe(df)
all_results.append(df_metrics)
# Calculate mean and std deviation of metrics across runs
df_metrics_mean = pd.concat(all_results).groupby(level=0).mean()
df_metrics_std = pd.concat(all_results).groupby(level=0).std()
return df_metrics_mean, df_metrics_std
""" ] ,
["""
def analyze_sales_data(sales_records):
active_sales = filter(lambda record: record['status'] == 'active', sales_records)
sales_by_category = {}
for record in active_sales:
category = record['category']
total_sales = record['units_sold'] * record['price_per_unit']
if category not in sales_by_category:
sales_by_category[category] = {'total_sales': 0, 'total_units': 0}
sales_by_category[category]['total_sales'] += total_sales
sales_by_category[category]['total_units'] += record['units_sold']
average_sales_data = []
for category, data in sales_by_category.items():
average_sales = data['total_sales'] / data['total_units']
sales_by_category[category]['average_sales'] = average_sales
average_sales_data.append((category, average_sales))
average_sales_data.sort(key=lambda x: x[1], reverse=True)
for rank, (category, _) in enumerate(average_sales_data, start=1):
sales_by_category[category]['rank'] = rank
return sales_by_category
"""]]
# Stream text - stream tokens with InferenceClient from TGI
async def predict(message, chatbot, temperature=0.9, max_new_tokens=4096, top_p=0.6, repetition_penalty=1.0,):
if system_prompt != "":
input_prompt = f"{system_prompt}"
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
input_prompt = input_prompt + str(message) + " [/INST] "
partial_message = ""
async for token in await client.text_generation(prompt=input_prompt,
max_new_tokens=max_new_tokens,
stream=True,
best_of=1,
temperature=temperature,
top_p=top_p,
do_sample=True,
repetition_penalty=repetition_penalty):
partial_message = partial_message + token
yield partial_message
gr.ChatInterface(
inference,
chatbot=gr.Chatbot(height=500),
textbox=gr.Textbox(placeholder="Chat with me!", container=False, scale=7),
title=title,
description=description,
theme="abidlabs/Lime",
examples=examples,
cache_examples=True,
retry_btn="Retry",
undo_btn="Undo",
clear_btn="Clear",
).queue().launch()
|