jzou1995 commited on
Commit
fd8b571
Β·
verified Β·
1 Parent(s): 68b7136

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -22
app.py CHANGED
@@ -1,4 +1,58 @@
1
- import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import re
3
  import json
4
  import requests
@@ -40,11 +94,11 @@ def google_search_naics(company_name: str) -> List[str]:
40
 
41
  # Create multiple search queries for better results
42
  queries = [
43
- f"NAICS code for {company_name}",
44
- f"what is {company_name} company NAICS code",
45
- f"{company_name} business entity NAICS classification",
46
- f"{company_name} industry classification NAICS",
47
- f"{company_name} company information NAICS"
48
  ]
49
 
50
  try:
@@ -87,6 +141,16 @@ def get_naics_classification(model, company_name: str, context: str, candidates:
87
  try:
88
  print("πŸ€– AI is analyzing NAICS classification...")
89
 
 
 
 
 
 
 
 
 
 
 
90
  # If we have candidate codes from Google search
91
  if candidates:
92
  # Create a prompt that asks for research on the candidates
@@ -233,38 +297,106 @@ def classify_company(company_name: str, company_description: str, api_key: str =
233
 
234
  # Create the Gradio interface
235
  def create_gradio_interface():
 
 
 
236
  with gr.Blocks(title="NAICS Code Finder") as demo:
237
  gr.Markdown("# NAICS Code Finder")
238
- gr.Markdown("Enter a company name and optional description to find the most appropriate NAICS code.")
239
 
240
  with gr.Row():
241
  with gr.Column():
242
  company_name = gr.Textbox(label="Company Name", placeholder="Enter company name")
243
- company_description = gr.Textbox(label="Company Description (optional)", placeholder="Brief description of the company")
244
- api_key = gr.Textbox(
245
- label="Gemini API Key (optional)",
246
- placeholder="Enter your API key or set GEMINI_API_KEY env variable",
247
- visible=not bool(os.environ.get('GEMINI_API_KEY'))
248
- )
249
- submit_btn = gr.Button("Find NAICS Code")
 
 
 
 
 
 
250
 
251
  with gr.Column():
 
252
  naics_output = gr.Markdown(label="NAICS Code")
253
- research_output = gr.Markdown(label="Research")
254
- reasoning_output = gr.Markdown(label="Reasoning")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
  submit_btn.click(
257
- classify_company,
258
  inputs=[company_name, company_description, api_key],
259
- outputs=[naics_output, research_output, reasoning_output]
260
  )
261
 
262
  gr.Examples(
263
  [
264
- ["Apple Inc", "Tech company that makes iPhones and computers"],
265
- ["Starbucks", "Coffee shop chain"],
266
- ["Bank of America", "Banking and financial services"],
267
- ["Tesla", "Electric vehicle manufacturer"]
268
  ],
269
  inputs=[company_name, company_description]
270
  )
 
1
+ def google_search_company_info(company_name: str) -> str:
2
+ """
3
+ Search for basic company information to help with NAICS classification
4
+ """
5
+ company_info = ""
6
+
7
+ # Create search queries focused on company information
8
+ queries = [
9
+ f"what is {company_name} company",
10
+ f"{company_name} company about us",
11
+ f"{company_name} business description",
12
+ f"{company_name} company profile",
13
+ f"what does {company_name} company do"
14
+ ]
15
+
16
+ try:
17
+ print(f"πŸ” Searching for information about '{company_name}'...")
18
+
19
+ for query in queries[:2]: # Limit to first 2 queries to save time
20
+ try:
21
+ # Search with each query
22
+ search_results = search(query, stop=2, pause=2)
23
+
24
+ for result_url in search_results:
25
+ try:
26
+ response = requests.get(result_url, timeout=5)
27
+ if response.status_code == 200:
28
+ # Extract text from paragraphs
29
+ from bs4 import BeautifulSoup
30
+ soup = BeautifulSoup(response.text, 'html.parser')
31
+ paragraphs = soup.find_all('p')
32
+
33
+ # Get text from first 3 substantial paragraphs
34
+ for p in paragraphs:
35
+ text = p.get_text().strip()
36
+ if len(text) > 100 and company_name.lower() in text.lower():
37
+ company_info += text + "\n\n"
38
+ if len(company_info) > 500:
39
+ break
40
+
41
+ if len(company_info) > 500:
42
+ break
43
+ except Exception as e:
44
+ print(f" ⚠️ Error fetching {result_url}: {e}")
45
+
46
+ if len(company_info) > 500:
47
+ break
48
+ except Exception as e:
49
+ print(f" ⚠️ Error with query '{query}': {e}")
50
+ continue
51
+
52
+ return company_info.strip()
53
+ except Exception as e:
54
+ print(f"❌ Error searching for company info: {str(e)}")
55
+ return ""import os
56
  import re
57
  import json
58
  import requests
 
94
 
95
  # Create multiple search queries for better results
96
  queries = [
97
+ f"2022 NAICS code for {company_name}",
98
+ f"NAICS 2022 classification for {company_name}",
99
+ f"{company_name} business NAICS 2022 code",
100
+ f"{company_name} industry NAICS code 2022",
101
+ f"what is {company_name} company NAICS code"
102
  ]
103
 
104
  try:
 
141
  try:
142
  print("πŸ€– AI is analyzing NAICS classification...")
143
 
144
+ # Get additional company information from Google
145
+ company_info = google_search_company_info(company_name)
146
+ if company_info:
147
+ print(f"πŸ“ Found additional company information:\n{company_info[:200]}...")
148
+ # Add the found information to the context
149
+ if context:
150
+ context = f"{context}\n\nAdditional information found online:\n{company_info}"
151
+ else:
152
+ context = f"Information found online:\n{company_info}"
153
+
154
  # If we have candidate codes from Google search
155
  if candidates:
156
  # Create a prompt that asks for research on the candidates
 
297
 
298
  # Create the Gradio interface
299
  def create_gradio_interface():
300
+ # Check if API key is set in environment
301
+ has_api_key = bool(os.environ.get('GEMINI_API_KEY'))
302
+
303
  with gr.Blocks(title="NAICS Code Finder") as demo:
304
  gr.Markdown("# NAICS Code Finder")
305
+ gr.Markdown("Enter a company name to find its appropriate NAICS code. The tool will search for information about the company and relevant NAICS codes online.")
306
 
307
  with gr.Row():
308
  with gr.Column():
309
  company_name = gr.Textbox(label="Company Name", placeholder="Enter company name")
310
+ company_description = gr.Textbox(label="Additional Context (optional)", placeholder="Any additional information about the company")
311
+
312
+ # Only show API key input if not set in environment
313
+ if not has_api_key:
314
+ api_key = gr.Textbox(
315
+ label="Gemini API Key (required)",
316
+ placeholder="Enter your Google Gemini API key",
317
+ type="password"
318
+ )
319
+ else:
320
+ api_key = gr.Textbox(visible=False, value="")
321
+
322
+ submit_btn = gr.Button("Find NAICS Code", variant="primary")
323
 
324
  with gr.Column():
325
+ status_output = gr.Markdown(label="Status")
326
  naics_output = gr.Markdown(label="NAICS Code")
327
+ with gr.Accordion("Company Information", open=False):
328
+ company_info_output = gr.Markdown()
329
+ with gr.Accordion("NAICS Codes Research", open=False):
330
+ research_output = gr.Markdown()
331
+ with gr.Accordion("Classification Reasoning", open=True):
332
+ reasoning_output = gr.Markdown()
333
+
334
+ # Functions for the interface
335
+ def process_company(company_name, company_description, api_key):
336
+ if not company_name:
337
+ return "Please enter a company name", "", "", "", ""
338
+
339
+ # Use API key from input or environment
340
+ key_to_use = api_key if api_key else os.environ.get('GEMINI_API_KEY')
341
+ if not key_to_use:
342
+ return "No API key provided. Please enter your Gemini API key.", "", "", "", ""
343
+
344
+ status_md = "πŸ” Searching for company information...\n\n"
345
+ yield status_md, "", "", "", ""
346
+
347
+ # Get company info first
348
+ company_info = google_search_company_info(company_name)
349
+ if company_info:
350
+ company_info_md = f"## Information found about {company_name}\n\n{company_info}"
351
+ status_md += "βœ… Found company information\n\n"
352
+ else:
353
+ company_info_md = f"No detailed information found for {company_name}"
354
+ status_md += "⚠️ No company information found\n\n"
355
+
356
+ yield status_md, "", company_info_md, "", ""
357
+
358
+ # Get NAICS candidates
359
+ status_md += "πŸ” Searching for NAICS codes...\n\n"
360
+ yield status_md, "", company_info_md, "", ""
361
+
362
+ # Run the core functionality
363
+ result = find_naics_code(company_name, company_description, key_to_use)
364
+
365
+ if "candidates" in result and result["candidates"]:
366
+ status_md += f"βœ… Found {len(result['candidates'])} potential NAICS codes\n\n"
367
+ else:
368
+ status_md += "⚠️ No specific NAICS codes found in search results\n\n"
369
+
370
+ status_md += "πŸ€– Analyzing classification...\n\n"
371
+ yield status_md, "", company_info_md, "", ""
372
+
373
+ # Format the NAICS code output
374
+ naics_code_md = f"## NAICS Code: {result['naics_code']}"
375
+
376
+ # Format the research output
377
+ research_md = ""
378
+ if "research" in result and result["research"]:
379
+ research_md = f"## Research on NAICS Codes\n\n{result['research']}"
380
+
381
+ # Format the reasoning output
382
+ reasoning_md = f"## Analysis\n\n{result['reasoning']}"
383
+
384
+ status_md += "βœ… Classification complete!"
385
+
386
+ return status_md, naics_code_md, company_info_md, research_md, reasoning_md
387
 
388
  submit_btn.click(
389
+ process_company,
390
  inputs=[company_name, company_description, api_key],
391
+ outputs=[status_output, naics_output, company_info_output, research_output, reasoning_output]
392
  )
393
 
394
  gr.Examples(
395
  [
396
+ ["Apple Inc", "Tech company"],
397
+ ["Walmart", "Retail store"],
398
+ ["Goldman Sachs", "Investment bank"],
399
+ ["Ford Motor Company", "Automobile manufacturer"]
400
  ],
401
  inputs=[company_name, company_description]
402
  )