|
import os |
|
import re |
|
import json |
|
import gradio as gr |
|
import requests |
|
from typing import List, Dict |
|
from googlesearch import search |
|
import google.generativeai as genai |
|
from google.generativeai.types import HarmCategory, HarmBlockThreshold |
|
|
|
def initialize_gemini(api_key: str): |
|
"""Initialize the Google Gemini API with appropriate configurations""" |
|
genai.configure(api_key=api_key) |
|
generation_config = { |
|
"temperature": 0.2, |
|
"top_p": 0.8, |
|
"top_k": 40, |
|
"max_output_tokens": 1024, |
|
} |
|
safety_settings = { |
|
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, |
|
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, |
|
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, |
|
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, |
|
} |
|
|
|
model = genai.GenerativeModel( |
|
model_name="gemini-1.5-flash", |
|
generation_config=generation_config, |
|
safety_settings=safety_settings |
|
) |
|
return model |
|
|
|
def google_search_naics(company_name: str) -> List[str]: |
|
"""Find potential NAICS codes for a company using Google search""" |
|
query = f"NAICS code 2022 for {company_name}" |
|
naics_codes = set() |
|
|
|
try: |
|
search_results = search(query, stop=5, pause=2) |
|
|
|
for result_url in search_results: |
|
try: |
|
response = requests.get(result_url, timeout=5) |
|
if response.status_code == 200: |
|
|
|
found_codes = re.findall(r'\b\d{6}\b', response.text) |
|
naics_codes.update(found_codes) |
|
except Exception as e: |
|
print(f"Error fetching {result_url}: {e}") |
|
|
|
return list(naics_codes)[:5] |
|
except Exception as e: |
|
print(f"Error performing Google search: {str(e)}") |
|
return [] |
|
|
|
def get_naics_classification(model, company_name: str, context: str, candidates: List[str]) -> dict: |
|
""" |
|
Use Gemini AI to determine the most appropriate NAICS code from candidates |
|
First provides reasoning, then multiple possibilities with confidence levels |
|
""" |
|
try: |
|
|
|
if candidates: |
|
prompt = f""" |
|
You are a NAICS code classification expert. Based on the company information provided and the NAICS code candidates found from Google search, determine the most appropriate NAICS code. |
|
|
|
Company Name: {company_name} |
|
Context Information: {context} |
|
|
|
NAICS Code Candidates from Google Search: {candidates} |
|
|
|
First, explain your reasoning for which industry this company belongs to. |
|
Then list 3 potential NAICS classifications with confidence percentages (must add up to 100%). |
|
Finally, provide your final conclusion. |
|
|
|
Your response should be in this format: |
|
REASONING: [Your detailed reasoning about the company's industry classification] |
|
|
|
POSSIBILITY_1: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence |
|
POSSIBILITY_2: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence |
|
POSSIBILITY_3: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence |
|
|
|
CONCLUSION: I am [XX]% confident this company is [industry description] which is NAICS code [6-digit code] |
|
""" |
|
|
|
else: |
|
prompt = f""" |
|
You are a NAICS code classification expert. Based on the company information provided, determine the most appropriate NAICS code. |
|
|
|
Company Name: {company_name} |
|
Context Information: {context} |
|
|
|
First, explain your reasoning for which industry this company belongs to. |
|
Then list 3 potential NAICS classifications with confidence percentages (must add up to 100%). |
|
Finally, provide your final conclusion. |
|
|
|
Your response should be in this format: |
|
REASONING: [Your detailed reasoning about the company's industry classification] |
|
|
|
POSSIBILITY_1: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence |
|
POSSIBILITY_2: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence |
|
POSSIBILITY_3: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence |
|
|
|
CONCLUSION: I am [XX]% confident this company is [industry description] which is NAICS code [6-digit code] |
|
""" |
|
response = model.generate_content(prompt) |
|
response_text = response.text.strip() |
|
|
|
|
|
reasoning_match = re.search(r'REASONING:(.*?)POSSIBILITY_1:', response_text, re.DOTALL | re.IGNORECASE) |
|
reasoning = reasoning_match.group(1).strip() if reasoning_match else "No reasoning provided." |
|
|
|
|
|
possibilities = [] |
|
|
|
|
|
poss1_match = re.search(r'POSSIBILITY_1:(.*?)POSSIBILITY_2:', response_text, re.DOTALL | re.IGNORECASE) |
|
if poss1_match: |
|
possibilities.append(poss1_match.group(1).strip()) |
|
|
|
|
|
poss2_match = re.search(r'POSSIBILITY_2:(.*?)POSSIBILITY_3:', response_text, re.DOTALL | re.IGNORECASE) |
|
if poss2_match: |
|
possibilities.append(poss2_match.group(1).strip()) |
|
|
|
|
|
poss3_match = re.search(r'POSSIBILITY_3:(.*?)CONCLUSION:', response_text, re.DOTALL | re.IGNORECASE) |
|
if poss3_match: |
|
possibilities.append(poss3_match.group(1).strip()) |
|
|
|
|
|
conclusion_match = re.search(r'CONCLUSION:(.*?) |
|
except Exception as e: |
|
print(f"Error getting NAICS classification: {str(e)}") |
|
return { |
|
"naics_code": "000000", |
|
"reasoning": f"Error analyzing company: {str(e)}" |
|
} |
|
|
|
def find_naics_code(api_key, company_name, company_description): |
|
"""Main function to find NAICS code that will be called by Gradio""" |
|
if not api_key or not company_name: |
|
return "Please provide both API key and company name." |
|
|
|
try: |
|
# Initialize Gemini API |
|
model = initialize_gemini(api_key) |
|
|
|
# Search for NAICS candidates |
|
naics_candidates = google_search_naics(company_name) |
|
|
|
# Get classification |
|
if not naics_candidates: |
|
result = get_naics_classification(model, company_name, company_description, []) |
|
else: |
|
result = get_naics_classification(model, company_name, company_description, naics_candidates) |
|
|
|
# Format the output |
|
output = f"## NAICS Code for {company_name}\n\n" |
|
output += f"**NAICS Code:** {result['naics_code']}\n\n" |
|
output += f"**Reasoning:**\n{result['reasoning']}\n\n" |
|
|
|
# Add possibilities section |
|
if 'possibilities' in result and result['possibilities']: |
|
output += f"**Possible Classifications:**\n\n" |
|
for i, possibility in enumerate(result['possibilities'], 1): |
|
output += f"{i}. {possibility}\n\n" |
|
|
|
# Add conclusion |
|
if 'conclusion' in result and result['conclusion']: |
|
output += f"**Conclusion:**\n{result['conclusion']}\n\n" |
|
|
|
if naics_candidates: |
|
output += f"**Candidate NAICS Codes Found from Google:**\n{', '.join(naics_candidates)}" |
|
|
|
return output |
|
|
|
except Exception as e: |
|
return f"Error: {str(e)}" |
|
|
|
# Create Gradio Interface |
|
with gr.Blocks(title="NAICS Code Finder") as app: |
|
gr.Markdown("# NAICS Code Finder") |
|
gr.Markdown("This app helps you find the appropriate NAICS code for a company based on its name and description.") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
api_key = gr.Textbox(label="Google Gemini API Key", placeholder="Enter your Gemini API key here", type="password") |
|
company_name = gr.Textbox(label="Company Name", placeholder="Enter the company name") |
|
company_description = gr.Textbox(label="Company Description", placeholder="Enter a brief description of the company", lines=5) |
|
|
|
submit_btn = gr.Button("Find NAICS Code") |
|
|
|
with gr.Column(): |
|
output = gr.Markdown(label="Result") |
|
|
|
submit_btn.click( |
|
fn=find_naics_code, |
|
inputs=[api_key, company_name, company_description], |
|
outputs=output |
|
) |
|
|
|
if __name__ == "__main__": |
|
app.launch() |
|
, response_text, re.DOTALL | re.IGNORECASE) |
|
conclusion = conclusion_match.group(1).strip() if conclusion_match else "No conclusion provided." |
|
|
|
# Extract final NAICS code from conclusion |
|
naics_match = re.search(r'NAICS code (\d{6})', conclusion) |
|
if naics_match: |
|
naics_code = naics_match.group(1) |
|
else: |
|
# Try to find any 6-digit code in the conclusion |
|
code_match = re.search(r'\b(\d{6})\b', conclusion) |
|
naics_code = code_match.group(1) if code_match else "000000" |
|
|
|
return { |
|
"naics_code": naics_code, |
|
"reasoning": reasoning, |
|
"possibilities": possibilities, |
|
"conclusion": conclusion |
|
} |
|
except Exception as e: |
|
print(f"Error getting NAICS classification: {str(e)}") |
|
return { |
|
"naics_code": "000000", |
|
"reasoning": f"Error analyzing company: {str(e)}" |
|
} |
|
|
|
def find_naics_code(api_key, company_name, company_description): |
|
"""Main function to find NAICS code that will be called by Gradio""" |
|
if not api_key or not company_name: |
|
return "Please provide both API key and company name." |
|
|
|
try: |
|
# Initialize Gemini API |
|
model = initialize_gemini(api_key) |
|
|
|
# Search for NAICS candidates |
|
naics_candidates = google_search_naics(company_name) |
|
|
|
# Get classification |
|
if not naics_candidates: |
|
result = get_naics_classification(model, company_name, company_description, []) |
|
else: |
|
result = get_naics_classification(model, company_name, company_description, naics_candidates) |
|
|
|
# Format the output |
|
output = f"## NAICS Code for {company_name}\n\n" |
|
output += f"**NAICS Code:** {result['naics_code']}\n\n" |
|
output += f"**Reasoning:**\n{result['reasoning']}\n\n" |
|
|
|
if naics_candidates: |
|
output += f"**Candidate NAICS Codes Found:**\n{', '.join(naics_candidates)}" |
|
|
|
return output |
|
|
|
except Exception as e: |
|
return f"Error: {str(e)}" |
|
|
|
# Create Gradio Interface |
|
with gr.Blocks(title="NAICS Code Finder") as app: |
|
gr.Markdown("# NAICS Code Finder") |
|
gr.Markdown("This app helps you find the appropriate NAICS code for a company based on its name and description.") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
api_key = gr.Textbox(label="Google Gemini API Key", placeholder="Enter your Gemini API key here", type="password") |
|
company_name = gr.Textbox(label="Company Name", placeholder="Enter the company name") |
|
company_description = gr.Textbox(label="Company Description", placeholder="Enter a brief description of the company", lines=5) |
|
|
|
submit_btn = gr.Button("Find NAICS Code") |
|
|
|
with gr.Column(): |
|
output = gr.Markdown(label="Result") |
|
|
|
submit_btn.click( |
|
fn=find_naics_code, |
|
inputs=[api_key, company_name, company_description], |
|
outputs=output |
|
) |
|
|
|
if __name__ == "__main__": |
|
app.launch() |