Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,58 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import re
|
3 |
import json
|
4 |
import requests
|
@@ -40,11 +94,11 @@ def google_search_naics(company_name: str) -> List[str]:
|
|
40 |
|
41 |
# Create multiple search queries for better results
|
42 |
queries = [
|
43 |
-
f"NAICS code for {company_name}",
|
44 |
-
f"
|
45 |
-
f"{company_name} business
|
46 |
-
f"{company_name} industry
|
47 |
-
f"{company_name} company
|
48 |
]
|
49 |
|
50 |
try:
|
@@ -87,6 +141,16 @@ def get_naics_classification(model, company_name: str, context: str, candidates:
|
|
87 |
try:
|
88 |
print("π€ AI is analyzing NAICS classification...")
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
# If we have candidate codes from Google search
|
91 |
if candidates:
|
92 |
# Create a prompt that asks for research on the candidates
|
@@ -233,38 +297,106 @@ def classify_company(company_name: str, company_description: str, api_key: str =
|
|
233 |
|
234 |
# Create the Gradio interface
|
235 |
def create_gradio_interface():
|
|
|
|
|
|
|
236 |
with gr.Blocks(title="NAICS Code Finder") as demo:
|
237 |
gr.Markdown("# NAICS Code Finder")
|
238 |
-
gr.Markdown("Enter a company name
|
239 |
|
240 |
with gr.Row():
|
241 |
with gr.Column():
|
242 |
company_name = gr.Textbox(label="Company Name", placeholder="Enter company name")
|
243 |
-
company_description = gr.Textbox(label="
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
|
251 |
with gr.Column():
|
|
|
252 |
naics_output = gr.Markdown(label="NAICS Code")
|
253 |
-
|
254 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
|
256 |
submit_btn.click(
|
257 |
-
|
258 |
inputs=[company_name, company_description, api_key],
|
259 |
-
outputs=[naics_output, research_output, reasoning_output]
|
260 |
)
|
261 |
|
262 |
gr.Examples(
|
263 |
[
|
264 |
-
["Apple Inc", "Tech company
|
265 |
-
["
|
266 |
-
["
|
267 |
-
["
|
268 |
],
|
269 |
inputs=[company_name, company_description]
|
270 |
)
|
|
|
1 |
+
def google_search_company_info(company_name: str) -> str:
|
2 |
+
"""
|
3 |
+
Search for basic company information to help with NAICS classification
|
4 |
+
"""
|
5 |
+
company_info = ""
|
6 |
+
|
7 |
+
# Create search queries focused on company information
|
8 |
+
queries = [
|
9 |
+
f"what is {company_name} company",
|
10 |
+
f"{company_name} company about us",
|
11 |
+
f"{company_name} business description",
|
12 |
+
f"{company_name} company profile",
|
13 |
+
f"what does {company_name} company do"
|
14 |
+
]
|
15 |
+
|
16 |
+
try:
|
17 |
+
print(f"π Searching for information about '{company_name}'...")
|
18 |
+
|
19 |
+
for query in queries[:2]: # Limit to first 2 queries to save time
|
20 |
+
try:
|
21 |
+
# Search with each query
|
22 |
+
search_results = search(query, stop=2, pause=2)
|
23 |
+
|
24 |
+
for result_url in search_results:
|
25 |
+
try:
|
26 |
+
response = requests.get(result_url, timeout=5)
|
27 |
+
if response.status_code == 200:
|
28 |
+
# Extract text from paragraphs
|
29 |
+
from bs4 import BeautifulSoup
|
30 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
31 |
+
paragraphs = soup.find_all('p')
|
32 |
+
|
33 |
+
# Get text from first 3 substantial paragraphs
|
34 |
+
for p in paragraphs:
|
35 |
+
text = p.get_text().strip()
|
36 |
+
if len(text) > 100 and company_name.lower() in text.lower():
|
37 |
+
company_info += text + "\n\n"
|
38 |
+
if len(company_info) > 500:
|
39 |
+
break
|
40 |
+
|
41 |
+
if len(company_info) > 500:
|
42 |
+
break
|
43 |
+
except Exception as e:
|
44 |
+
print(f" β οΈ Error fetching {result_url}: {e}")
|
45 |
+
|
46 |
+
if len(company_info) > 500:
|
47 |
+
break
|
48 |
+
except Exception as e:
|
49 |
+
print(f" β οΈ Error with query '{query}': {e}")
|
50 |
+
continue
|
51 |
+
|
52 |
+
return company_info.strip()
|
53 |
+
except Exception as e:
|
54 |
+
print(f"β Error searching for company info: {str(e)}")
|
55 |
+
return ""import os
|
56 |
import re
|
57 |
import json
|
58 |
import requests
|
|
|
94 |
|
95 |
# Create multiple search queries for better results
|
96 |
queries = [
|
97 |
+
f"2022 NAICS code for {company_name}",
|
98 |
+
f"NAICS 2022 classification for {company_name}",
|
99 |
+
f"{company_name} business NAICS 2022 code",
|
100 |
+
f"{company_name} industry NAICS code 2022",
|
101 |
+
f"what is {company_name} company NAICS code"
|
102 |
]
|
103 |
|
104 |
try:
|
|
|
141 |
try:
|
142 |
print("π€ AI is analyzing NAICS classification...")
|
143 |
|
144 |
+
# Get additional company information from Google
|
145 |
+
company_info = google_search_company_info(company_name)
|
146 |
+
if company_info:
|
147 |
+
print(f"π Found additional company information:\n{company_info[:200]}...")
|
148 |
+
# Add the found information to the context
|
149 |
+
if context:
|
150 |
+
context = f"{context}\n\nAdditional information found online:\n{company_info}"
|
151 |
+
else:
|
152 |
+
context = f"Information found online:\n{company_info}"
|
153 |
+
|
154 |
# If we have candidate codes from Google search
|
155 |
if candidates:
|
156 |
# Create a prompt that asks for research on the candidates
|
|
|
297 |
|
298 |
# Create the Gradio interface
|
299 |
def create_gradio_interface():
|
300 |
+
# Check if API key is set in environment
|
301 |
+
has_api_key = bool(os.environ.get('GEMINI_API_KEY'))
|
302 |
+
|
303 |
with gr.Blocks(title="NAICS Code Finder") as demo:
|
304 |
gr.Markdown("# NAICS Code Finder")
|
305 |
+
gr.Markdown("Enter a company name to find its appropriate NAICS code. The tool will search for information about the company and relevant NAICS codes online.")
|
306 |
|
307 |
with gr.Row():
|
308 |
with gr.Column():
|
309 |
company_name = gr.Textbox(label="Company Name", placeholder="Enter company name")
|
310 |
+
company_description = gr.Textbox(label="Additional Context (optional)", placeholder="Any additional information about the company")
|
311 |
+
|
312 |
+
# Only show API key input if not set in environment
|
313 |
+
if not has_api_key:
|
314 |
+
api_key = gr.Textbox(
|
315 |
+
label="Gemini API Key (required)",
|
316 |
+
placeholder="Enter your Google Gemini API key",
|
317 |
+
type="password"
|
318 |
+
)
|
319 |
+
else:
|
320 |
+
api_key = gr.Textbox(visible=False, value="")
|
321 |
+
|
322 |
+
submit_btn = gr.Button("Find NAICS Code", variant="primary")
|
323 |
|
324 |
with gr.Column():
|
325 |
+
status_output = gr.Markdown(label="Status")
|
326 |
naics_output = gr.Markdown(label="NAICS Code")
|
327 |
+
with gr.Accordion("Company Information", open=False):
|
328 |
+
company_info_output = gr.Markdown()
|
329 |
+
with gr.Accordion("NAICS Codes Research", open=False):
|
330 |
+
research_output = gr.Markdown()
|
331 |
+
with gr.Accordion("Classification Reasoning", open=True):
|
332 |
+
reasoning_output = gr.Markdown()
|
333 |
+
|
334 |
+
# Functions for the interface
|
335 |
+
def process_company(company_name, company_description, api_key):
|
336 |
+
if not company_name:
|
337 |
+
return "Please enter a company name", "", "", "", ""
|
338 |
+
|
339 |
+
# Use API key from input or environment
|
340 |
+
key_to_use = api_key if api_key else os.environ.get('GEMINI_API_KEY')
|
341 |
+
if not key_to_use:
|
342 |
+
return "No API key provided. Please enter your Gemini API key.", "", "", "", ""
|
343 |
+
|
344 |
+
status_md = "π Searching for company information...\n\n"
|
345 |
+
yield status_md, "", "", "", ""
|
346 |
+
|
347 |
+
# Get company info first
|
348 |
+
company_info = google_search_company_info(company_name)
|
349 |
+
if company_info:
|
350 |
+
company_info_md = f"## Information found about {company_name}\n\n{company_info}"
|
351 |
+
status_md += "β
Found company information\n\n"
|
352 |
+
else:
|
353 |
+
company_info_md = f"No detailed information found for {company_name}"
|
354 |
+
status_md += "β οΈ No company information found\n\n"
|
355 |
+
|
356 |
+
yield status_md, "", company_info_md, "", ""
|
357 |
+
|
358 |
+
# Get NAICS candidates
|
359 |
+
status_md += "π Searching for NAICS codes...\n\n"
|
360 |
+
yield status_md, "", company_info_md, "", ""
|
361 |
+
|
362 |
+
# Run the core functionality
|
363 |
+
result = find_naics_code(company_name, company_description, key_to_use)
|
364 |
+
|
365 |
+
if "candidates" in result and result["candidates"]:
|
366 |
+
status_md += f"β
Found {len(result['candidates'])} potential NAICS codes\n\n"
|
367 |
+
else:
|
368 |
+
status_md += "β οΈ No specific NAICS codes found in search results\n\n"
|
369 |
+
|
370 |
+
status_md += "π€ Analyzing classification...\n\n"
|
371 |
+
yield status_md, "", company_info_md, "", ""
|
372 |
+
|
373 |
+
# Format the NAICS code output
|
374 |
+
naics_code_md = f"## NAICS Code: {result['naics_code']}"
|
375 |
+
|
376 |
+
# Format the research output
|
377 |
+
research_md = ""
|
378 |
+
if "research" in result and result["research"]:
|
379 |
+
research_md = f"## Research on NAICS Codes\n\n{result['research']}"
|
380 |
+
|
381 |
+
# Format the reasoning output
|
382 |
+
reasoning_md = f"## Analysis\n\n{result['reasoning']}"
|
383 |
+
|
384 |
+
status_md += "β
Classification complete!"
|
385 |
+
|
386 |
+
return status_md, naics_code_md, company_info_md, research_md, reasoning_md
|
387 |
|
388 |
submit_btn.click(
|
389 |
+
process_company,
|
390 |
inputs=[company_name, company_description, api_key],
|
391 |
+
outputs=[status_output, naics_output, company_info_output, research_output, reasoning_output]
|
392 |
)
|
393 |
|
394 |
gr.Examples(
|
395 |
[
|
396 |
+
["Apple Inc", "Tech company"],
|
397 |
+
["Walmart", "Retail store"],
|
398 |
+
["Goldman Sachs", "Investment bank"],
|
399 |
+
["Ford Motor Company", "Automobile manufacturer"]
|
400 |
],
|
401 |
inputs=[company_name, company_description]
|
402 |
)
|