Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
def google_search_company_info(company_name: str) -> str:
|
2 |
"""
|
3 |
Search for basic company information to help with NAICS classification
|
@@ -26,7 +60,6 @@ def google_search_company_info(company_name: str) -> str:
|
|
26 |
response = requests.get(result_url, timeout=5)
|
27 |
if response.status_code == 200:
|
28 |
# Extract text from paragraphs
|
29 |
-
from bs4 import BeautifulSoup
|
30 |
soup = BeautifulSoup(response.text, 'html.parser')
|
31 |
paragraphs = soup.find_all('p')
|
32 |
|
@@ -52,38 +85,7 @@ def google_search_company_info(company_name: str) -> str:
|
|
52 |
return company_info.strip()
|
53 |
except Exception as e:
|
54 |
print(f"❌ Error searching for company info: {str(e)}")
|
55 |
-
return ""
|
56 |
-
import re
|
57 |
-
import json
|
58 |
-
import requests
|
59 |
-
from typing import List, Dict, Optional, Tuple
|
60 |
-
import gradio as gr
|
61 |
-
from googlesearch import search
|
62 |
-
import google.generativeai as genai
|
63 |
-
from google.generativeai.types import HarmCategory, HarmBlockThreshold
|
64 |
-
|
65 |
-
def initialize_gemini(api_key: str):
|
66 |
-
"""Initialize the Google Gemini API with appropriate configurations"""
|
67 |
-
genai.configure(api_key=api_key)
|
68 |
-
generation_config = {
|
69 |
-
"temperature": 0.2,
|
70 |
-
"top_p": 0.8,
|
71 |
-
"top_k": 40,
|
72 |
-
"max_output_tokens": 1024,
|
73 |
-
}
|
74 |
-
safety_settings = {
|
75 |
-
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
|
76 |
-
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
|
77 |
-
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
|
78 |
-
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
|
79 |
-
}
|
80 |
-
|
81 |
-
model = genai.GenerativeModel(
|
82 |
-
model_name="gemini-1.5-flash",
|
83 |
-
generation_config=generation_config,
|
84 |
-
safety_settings=safety_settings
|
85 |
-
)
|
86 |
-
return model
|
87 |
|
88 |
def google_search_naics(company_name: str) -> List[str]:
|
89 |
"""
|
@@ -271,30 +273,6 @@ def find_naics_code(company_name: str, context: str = "", api_key: Optional[str]
|
|
271 |
|
272 |
return result
|
273 |
|
274 |
-
# Gradio interface function
|
275 |
-
def classify_company(company_name: str, company_description: str, api_key: str = None) -> Tuple[str, str, str]:
|
276 |
-
"""Process inputs from Gradio and return formatted results"""
|
277 |
-
if not api_key:
|
278 |
-
api_key = os.environ.get('GEMINI_API_KEY')
|
279 |
-
|
280 |
-
if not company_name:
|
281 |
-
return "Error: Company name is required", "", ""
|
282 |
-
|
283 |
-
result = find_naics_code(company_name, company_description, api_key)
|
284 |
-
|
285 |
-
# Format the NAICS code output
|
286 |
-
naics_code = f"**NAICS Code: {result['naics_code']}**"
|
287 |
-
|
288 |
-
# Format the research output
|
289 |
-
research = ""
|
290 |
-
if "research" in result and result["research"]:
|
291 |
-
research = f"## Research on NAICS Codes\n\n{result['research']}"
|
292 |
-
|
293 |
-
# Format the reasoning output
|
294 |
-
reasoning = f"## Analysis\n\n{result['reasoning']}"
|
295 |
-
|
296 |
-
return naics_code, research, reasoning
|
297 |
-
|
298 |
# Create the Gradio interface
|
299 |
def create_gradio_interface():
|
300 |
# Check if API key is set in environment
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import json
|
4 |
+
import requests
|
5 |
+
from typing import List, Dict, Optional, Tuple
|
6 |
+
import gradio as gr
|
7 |
+
from googlesearch import search
|
8 |
+
import google.generativeai as genai
|
9 |
+
from google.generativeai.types import HarmCategory, HarmBlockThreshold
|
10 |
+
from bs4 import BeautifulSoup
|
11 |
+
|
12 |
+
def initialize_gemini(api_key: str):
|
13 |
+
"""Initialize the Google Gemini API with appropriate configurations"""
|
14 |
+
genai.configure(api_key=api_key)
|
15 |
+
generation_config = {
|
16 |
+
"temperature": 0.2,
|
17 |
+
"top_p": 0.8,
|
18 |
+
"top_k": 40,
|
19 |
+
"max_output_tokens": 1024,
|
20 |
+
}
|
21 |
+
safety_settings = {
|
22 |
+
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
|
23 |
+
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
|
24 |
+
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
|
25 |
+
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
|
26 |
+
}
|
27 |
+
|
28 |
+
model = genai.GenerativeModel(
|
29 |
+
model_name="gemini-1.5-flash",
|
30 |
+
generation_config=generation_config,
|
31 |
+
safety_settings=safety_settings
|
32 |
+
)
|
33 |
+
return model
|
34 |
+
|
35 |
def google_search_company_info(company_name: str) -> str:
|
36 |
"""
|
37 |
Search for basic company information to help with NAICS classification
|
|
|
60 |
response = requests.get(result_url, timeout=5)
|
61 |
if response.status_code == 200:
|
62 |
# Extract text from paragraphs
|
|
|
63 |
soup = BeautifulSoup(response.text, 'html.parser')
|
64 |
paragraphs = soup.find_all('p')
|
65 |
|
|
|
85 |
return company_info.strip()
|
86 |
except Exception as e:
|
87 |
print(f"❌ Error searching for company info: {str(e)}")
|
88 |
+
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
def google_search_naics(company_name: str) -> List[str]:
|
91 |
"""
|
|
|
273 |
|
274 |
return result
|
275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
# Create the Gradio interface
|
277 |
def create_gradio_interface():
|
278 |
# Check if API key is set in environment
|