Update app.py
Browse files
app.py
CHANGED
@@ -59,7 +59,7 @@ def google_search_naics(company_name: str, company_description: str = "") -> Lis
|
|
59 |
print(f"Error performing Google search: {str(e)}")
|
60 |
return []
|
61 |
|
62 |
-
def get_naics_classification(model, company_name: str, context: str, candidates: List[str]) -> dict:
|
63 |
"""
|
64 |
Use Gemini AI to determine the most appropriate NAICS code from candidates
|
65 |
First provides reasoning, then multiple possibilities with confidence levels
|
@@ -72,17 +72,20 @@ You are a NAICS code classification expert. Based on the company information pro
|
|
72 |
|
73 |
Company Name: {company_name}
|
74 |
Context Information: {context}
|
75 |
-
|
76 |
NAICS Code Candidates from Google Search: {candidates}
|
77 |
|
78 |
-
First,
|
79 |
-
|
|
|
80 |
|
81 |
Then list 3 potential NAICS classifications with confidence percentages (must add up to 100%).
|
82 |
Finally, provide your final conclusion.
|
83 |
|
84 |
Your response should be in this format:
|
85 |
-
|
|
|
|
|
86 |
|
87 |
POSSIBILITY_1: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
|
88 |
POSSIBILITY_2: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
|
@@ -97,15 +100,19 @@ You are a NAICS code classification expert. Based on the company information pro
|
|
97 |
|
98 |
Company Name: {company_name}
|
99 |
Context Information: {context}
|
|
|
100 |
|
101 |
-
First,
|
102 |
-
|
|
|
103 |
|
104 |
Then list 3 potential NAICS classifications with confidence percentages (must add up to 100%).
|
105 |
Finally, provide your final conclusion.
|
106 |
|
107 |
Your response should be in this format:
|
108 |
-
|
|
|
|
|
109 |
|
110 |
POSSIBILITY_1: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
|
111 |
POSSIBILITY_2: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
|
@@ -116,6 +123,10 @@ CONCLUSION: I am [XX]% confident this company is [industry description] which is
|
|
116 |
response = model.generate_content(prompt)
|
117 |
response_text = response.text.strip()
|
118 |
|
|
|
|
|
|
|
|
|
119 |
# Extract reasoning
|
120 |
reasoning_match = re.search(r'REASONING:(.*?)POSSIBILITY_1:', response_text, re.DOTALL | re.IGNORECASE)
|
121 |
reasoning = reasoning_match.group(1).strip() if reasoning_match else "No reasoning provided."
|
@@ -153,6 +164,7 @@ CONCLUSION: I am [XX]% confident this company is [industry description] which is
|
|
153 |
|
154 |
return {
|
155 |
"naics_code": naics_code,
|
|
|
156 |
"reasoning": reasoning,
|
157 |
"possibilities": possibilities,
|
158 |
"conclusion": conclusion
|
@@ -161,6 +173,7 @@ CONCLUSION: I am [XX]% confident this company is [industry description] which is
|
|
161 |
print(f"Error getting NAICS classification: {str(e)}")
|
162 |
return {
|
163 |
"naics_code": "000000",
|
|
|
164 |
"reasoning": f"Error analyzing company: {str(e)}",
|
165 |
"possibilities": [],
|
166 |
"conclusion": "Error in analysis"
|
@@ -182,12 +195,21 @@ def find_naics_code(api_key, company_name, company_description):
|
|
182 |
|
183 |
# Get classification
|
184 |
if not naics_candidates:
|
185 |
-
result = get_naics_classification(model, company_name, company_description, [])
|
186 |
else:
|
187 |
-
result = get_naics_classification(model, company_name, company_description, naics_candidates)
|
188 |
|
189 |
# Format the output with NAICS code at the end
|
190 |
output = f"## Analysis for {company_name}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
output += f"**Reasoning:**\n{result['reasoning']}\n\n"
|
192 |
|
193 |
# Add possibilities section
|
@@ -196,11 +218,6 @@ def find_naics_code(api_key, company_name, company_description):
|
|
196 |
for i, possibility in enumerate(result['possibilities'], 1):
|
197 |
output += f"{i}. {possibility}\n\n"
|
198 |
|
199 |
-
# Add Google search information
|
200 |
-
if naics_candidates:
|
201 |
-
output += f"**Candidate NAICS Codes Found from Google:**\n{', '.join(naics_candidates)}\n\n"
|
202 |
-
output += f"*Search query used: \"{search_query}\"*\n\n"
|
203 |
-
|
204 |
# Add conclusion
|
205 |
if 'conclusion' in result and result['conclusion']:
|
206 |
output += f"**Conclusion:**\n{result['conclusion']}\n\n"
|
|
|
59 |
print(f"Error performing Google search: {str(e)}")
|
60 |
return []
|
61 |
|
62 |
+
def get_naics_classification(model, company_name: str, context: str, candidates: List[str], search_query: str) -> dict:
|
63 |
"""
|
64 |
Use Gemini AI to determine the most appropriate NAICS code from candidates
|
65 |
First provides reasoning, then multiple possibilities with confidence levels
|
|
|
72 |
|
73 |
Company Name: {company_name}
|
74 |
Context Information: {context}
|
75 |
+
Google Search Query Used: {search_query}
|
76 |
NAICS Code Candidates from Google Search: {candidates}
|
77 |
|
78 |
+
First, start with a section titled "GOOGLE_FINDINGS:" where you describe what the Google search results suggest about this company based on the NAICS codes found.
|
79 |
+
|
80 |
+
Then, in a section titled "REASONING:", explain your reasoning for which industry this company belongs to.
|
81 |
|
82 |
Then list 3 potential NAICS classifications with confidence percentages (must add up to 100%).
|
83 |
Finally, provide your final conclusion.
|
84 |
|
85 |
Your response should be in this format:
|
86 |
+
GOOGLE_FINDINGS: [Describe what the Google search results suggest about this company based on the NAICS codes found]
|
87 |
+
|
88 |
+
REASONING: [Your detailed reasoning about the company's industry classification]
|
89 |
|
90 |
POSSIBILITY_1: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
|
91 |
POSSIBILITY_2: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
|
|
|
100 |
|
101 |
Company Name: {company_name}
|
102 |
Context Information: {context}
|
103 |
+
Google Search Query Used: {search_query}
|
104 |
|
105 |
+
First, start with a section titled "GOOGLE_FINDINGS:" where you acknowledge that the Google search did not return any specific NAICS codes for this company.
|
106 |
+
|
107 |
+
Then, in a section titled "REASONING:", explain your reasoning for which industry this company belongs to based on the limited information available.
|
108 |
|
109 |
Then list 3 potential NAICS classifications with confidence percentages (must add up to 100%).
|
110 |
Finally, provide your final conclusion.
|
111 |
|
112 |
Your response should be in this format:
|
113 |
+
GOOGLE_FINDINGS: No specific NAICS codes were found in the Google search results using the query "{search_query}".
|
114 |
+
|
115 |
+
REASONING: [Your detailed reasoning about the company's industry classification based on the limited information available]
|
116 |
|
117 |
POSSIBILITY_1: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
|
118 |
POSSIBILITY_2: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
|
|
|
123 |
response = model.generate_content(prompt)
|
124 |
response_text = response.text.strip()
|
125 |
|
126 |
+
# Extract Google findings
|
127 |
+
google_findings_match = re.search(r'GOOGLE_FINDINGS:(.*?)REASONING:', response_text, re.DOTALL | re.IGNORECASE)
|
128 |
+
google_findings = google_findings_match.group(1).strip() if google_findings_match else "No Google findings provided."
|
129 |
+
|
130 |
# Extract reasoning
|
131 |
reasoning_match = re.search(r'REASONING:(.*?)POSSIBILITY_1:', response_text, re.DOTALL | re.IGNORECASE)
|
132 |
reasoning = reasoning_match.group(1).strip() if reasoning_match else "No reasoning provided."
|
|
|
164 |
|
165 |
return {
|
166 |
"naics_code": naics_code,
|
167 |
+
"google_findings": google_findings,
|
168 |
"reasoning": reasoning,
|
169 |
"possibilities": possibilities,
|
170 |
"conclusion": conclusion
|
|
|
173 |
print(f"Error getting NAICS classification: {str(e)}")
|
174 |
return {
|
175 |
"naics_code": "000000",
|
176 |
+
"google_findings": "Error occurred during Google search.",
|
177 |
"reasoning": f"Error analyzing company: {str(e)}",
|
178 |
"possibilities": [],
|
179 |
"conclusion": "Error in analysis"
|
|
|
195 |
|
196 |
# Get classification
|
197 |
if not naics_candidates:
|
198 |
+
result = get_naics_classification(model, company_name, company_description, [], search_query)
|
199 |
else:
|
200 |
+
result = get_naics_classification(model, company_name, company_description, naics_candidates, search_query)
|
201 |
|
202 |
# Format the output with NAICS code at the end
|
203 |
output = f"## Analysis for {company_name}\n\n"
|
204 |
+
|
205 |
+
# Display search query prominently at the top
|
206 |
+
output += f"**Google Search Query Used:**\n`{search_query}`\n\n"
|
207 |
+
|
208 |
+
# Add Google findings first
|
209 |
+
if 'google_findings' in result and result['google_findings']:
|
210 |
+
output += f"**Google Search Findings:**\n{result['google_findings']}\n\n"
|
211 |
+
|
212 |
+
# Then reasoning
|
213 |
output += f"**Reasoning:**\n{result['reasoning']}\n\n"
|
214 |
|
215 |
# Add possibilities section
|
|
|
218 |
for i, possibility in enumerate(result['possibilities'], 1):
|
219 |
output += f"{i}. {possibility}\n\n"
|
220 |
|
|
|
|
|
|
|
|
|
|
|
221 |
# Add conclusion
|
222 |
if 'conclusion' in result and result['conclusion']:
|
223 |
output += f"**Conclusion:**\n{result['conclusion']}\n\n"
|