Spaces:
Running
Running
Delete utils
Browse files- utils/data_loader.py +0 -29
- utils/query_parser.py +0 -76
utils/data_loader.py
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
# src/utils/data_loader.py
|
2 |
-
|
3 |
-
import pandas as pd
|
4 |
-
import json
|
5 |
-
|
6 |
-
def load_restaurant_data(file_path: str) -> pd.DataFrame:
|
7 |
-
"""
|
8 |
-
Load restaurant data from JSON file into a DataFrame.
|
9 |
-
|
10 |
-
Args:
|
11 |
-
file_path (str): Path to the JSON file.
|
12 |
-
|
13 |
-
Returns:
|
14 |
-
pd.DataFrame: DataFrame containing restaurant data.
|
15 |
-
"""
|
16 |
-
with open(file_path, 'r', encoding='utf-8') as f:
|
17 |
-
data = json.load(f)
|
18 |
-
|
19 |
-
df = pd.DataFrame(data)
|
20 |
-
# Create a text field for embedding and BM25
|
21 |
-
df['text'] = df.apply(
|
22 |
-
lambda row: f"{row['name']} ({row['cuisine']}): {', '.join(row['dishes'])}. "
|
23 |
-
f"Price: {row['price_range']}, Distance: {row['distance']} km, "
|
24 |
-
f"Rating: {row['rating']}. Description: {row['description']}",
|
25 |
-
axis=1
|
26 |
-
)
|
27 |
-
return df
|
28 |
-
if __name__ == "__main__":
|
29 |
-
print(load_restaurant_data("./data/restaurants.json"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/query_parser.py
DELETED
@@ -1,76 +0,0 @@
|
|
1 |
-
# src/utils/query_parser.py
|
2 |
-
import pandas as pd
|
3 |
-
import json
|
4 |
-
from typing import Dict, Any
|
5 |
-
from src.generation.llm import LLM
|
6 |
-
|
7 |
-
class QueryParser:
|
8 |
-
def __init__(self, df: pd.DataFrame):
|
9 |
-
"""
|
10 |
-
Initialize the query parser with restaurant data.
|
11 |
-
|
12 |
-
Args:
|
13 |
-
df (pd.DataFrame): DataFrame containing restaurant data.
|
14 |
-
"""
|
15 |
-
self.llm = LLM()
|
16 |
-
self.df = df
|
17 |
-
self.valid_cuisines = sorted(self.df['cuisine'].unique().tolist())
|
18 |
-
self.valid_price_ranges = sorted(self.df['price_range'].unique().tolist())
|
19 |
-
self.valid_dishes = sorted(set([dish for dishes in self.df['dishes'] for dish in dishes]))
|
20 |
-
|
21 |
-
def parse_query(self, query: str) -> Dict[str, Any]:
|
22 |
-
"""
|
23 |
-
Parse the query to extract features.
|
24 |
-
|
25 |
-
Args:
|
26 |
-
query (str): User query.
|
27 |
-
|
28 |
-
Returns:
|
29 |
-
Dict[str, Any]: Parsed features.
|
30 |
-
"""
|
31 |
-
# Format prompt using LLM's prompt template
|
32 |
-
prompt = self.llm.format_query_prompt(
|
33 |
-
query=query,
|
34 |
-
cuisines=self.valid_cuisines,
|
35 |
-
dishes=self.valid_dishes,
|
36 |
-
price_ranges=self.valid_price_ranges
|
37 |
-
)
|
38 |
-
|
39 |
-
# Generate response
|
40 |
-
response = self.llm.generate(prompt)
|
41 |
-
|
42 |
-
# Parse JSON response
|
43 |
-
try:
|
44 |
-
json_start = response.find("{")
|
45 |
-
json_end = response.rfind("}") + 1
|
46 |
-
parsed = json.loads(response[json_start:json_end])
|
47 |
-
return parsed
|
48 |
-
except json.JSONDecodeError:
|
49 |
-
return {
|
50 |
-
"cuisine": None,
|
51 |
-
"menu": [],
|
52 |
-
"price_range": None,
|
53 |
-
"distance": None,
|
54 |
-
"rating": None,
|
55 |
-
"description": query
|
56 |
-
}
|
57 |
-
|
58 |
-
|
59 |
-
# Quick test block for QueryParser
|
60 |
-
if __name__ == "__main__":
|
61 |
-
import pandas as pd
|
62 |
-
|
63 |
-
sample_data = {
|
64 |
-
"cuisine": ["Italian", "Japanese"],
|
65 |
-
"price_range": ["$", "$$"],
|
66 |
-
"dishes": [["pizza", "pasta"], ["sushi", "ramen"]]
|
67 |
-
}
|
68 |
-
df = pd.DataFrame(sample_data)
|
69 |
-
parser = QueryParser(df)
|
70 |
-
|
71 |
-
user_query = "I want cheap sushi"
|
72 |
-
result = parser.parse_query(user_query)
|
73 |
-
|
74 |
-
print("Parsed Query Result:")
|
75 |
-
print(result)
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|