jzou1995 commited on
Commit
3a20bdf
·
verified ·
1 Parent(s): 2b57935

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -15
app.py CHANGED
@@ -59,7 +59,7 @@ def google_search_naics(company_name: str, company_description: str = "") -> Lis
59
  print(f"Error performing Google search: {str(e)}")
60
  return []
61
 
62
- def get_naics_classification(model, company_name: str, context: str, candidates: List[str]) -> dict:
63
  """
64
  Use Gemini AI to determine the most appropriate NAICS code from candidates
65
  First provides reasoning, then multiple possibilities with confidence levels
@@ -72,17 +72,20 @@ You are a NAICS code classification expert. Based on the company information pro
72
 
73
  Company Name: {company_name}
74
  Context Information: {context}
75
-
76
  NAICS Code Candidates from Google Search: {candidates}
77
 
78
- First, explain your reasoning for which industry this company belongs to.
79
- Start with "Based on the Google search results for this company and the provided context, I can suggest that..."
 
80
 
81
  Then list 3 potential NAICS classifications with confidence percentages (must add up to 100%).
82
  Finally, provide your final conclusion.
83
 
84
  Your response should be in this format:
85
- REASONING: Based on the Google search results for this company and the provided context, I can suggest that... [Your detailed reasoning about the company's industry classification]
 
 
86
 
87
  POSSIBILITY_1: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
88
  POSSIBILITY_2: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
@@ -97,15 +100,19 @@ You are a NAICS code classification expert. Based on the company information pro
97
 
98
  Company Name: {company_name}
99
  Context Information: {context}
 
100
 
101
- First, explain your reasoning for which industry this company belongs to.
102
- Start with "Based on the limited information available and without specific Google search results, I can suggest that..."
 
103
 
104
  Then list 3 potential NAICS classifications with confidence percentages (must add up to 100%).
105
  Finally, provide your final conclusion.
106
 
107
  Your response should be in this format:
108
- REASONING: Based on the limited information available and without specific Google search results, I can suggest that... [Your detailed reasoning about the company's industry classification]
 
 
109
 
110
  POSSIBILITY_1: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
111
  POSSIBILITY_2: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
@@ -116,6 +123,10 @@ CONCLUSION: I am [XX]% confident this company is [industry description] which is
116
  response = model.generate_content(prompt)
117
  response_text = response.text.strip()
118
 
 
 
 
 
119
  # Extract reasoning
120
  reasoning_match = re.search(r'REASONING:(.*?)POSSIBILITY_1:', response_text, re.DOTALL | re.IGNORECASE)
121
  reasoning = reasoning_match.group(1).strip() if reasoning_match else "No reasoning provided."
@@ -153,6 +164,7 @@ CONCLUSION: I am [XX]% confident this company is [industry description] which is
153
 
154
  return {
155
  "naics_code": naics_code,
 
156
  "reasoning": reasoning,
157
  "possibilities": possibilities,
158
  "conclusion": conclusion
@@ -161,6 +173,7 @@ CONCLUSION: I am [XX]% confident this company is [industry description] which is
161
  print(f"Error getting NAICS classification: {str(e)}")
162
  return {
163
  "naics_code": "000000",
 
164
  "reasoning": f"Error analyzing company: {str(e)}",
165
  "possibilities": [],
166
  "conclusion": "Error in analysis"
@@ -182,12 +195,21 @@ def find_naics_code(api_key, company_name, company_description):
182
 
183
  # Get classification
184
  if not naics_candidates:
185
- result = get_naics_classification(model, company_name, company_description, [])
186
  else:
187
- result = get_naics_classification(model, company_name, company_description, naics_candidates)
188
 
189
  # Format the output with NAICS code at the end
190
  output = f"## Analysis for {company_name}\n\n"
 
 
 
 
 
 
 
 
 
191
  output += f"**Reasoning:**\n{result['reasoning']}\n\n"
192
 
193
  # Add possibilities section
@@ -196,11 +218,6 @@ def find_naics_code(api_key, company_name, company_description):
196
  for i, possibility in enumerate(result['possibilities'], 1):
197
  output += f"{i}. {possibility}\n\n"
198
 
199
- # Add Google search information
200
- if naics_candidates:
201
- output += f"**Candidate NAICS Codes Found from Google:**\n{', '.join(naics_candidates)}\n\n"
202
- output += f"*Search query used: \"{search_query}\"*\n\n"
203
-
204
  # Add conclusion
205
  if 'conclusion' in result and result['conclusion']:
206
  output += f"**Conclusion:**\n{result['conclusion']}\n\n"
 
59
  print(f"Error performing Google search: {str(e)}")
60
  return []
61
 
62
+ def get_naics_classification(model, company_name: str, context: str, candidates: List[str], search_query: str) -> dict:
63
  """
64
  Use Gemini AI to determine the most appropriate NAICS code from candidates
65
  First provides reasoning, then multiple possibilities with confidence levels
 
72
 
73
  Company Name: {company_name}
74
  Context Information: {context}
75
+ Google Search Query Used: {search_query}
76
  NAICS Code Candidates from Google Search: {candidates}
77
 
78
+ First, start with a section titled "GOOGLE_FINDINGS:" where you describe what the Google search results suggest about this company based on the NAICS codes found.
79
+
80
+ Then, in a section titled "REASONING:", explain your reasoning for which industry this company belongs to.
81
 
82
  Then list 3 potential NAICS classifications with confidence percentages (must add up to 100%).
83
  Finally, provide your final conclusion.
84
 
85
  Your response should be in this format:
86
+ GOOGLE_FINDINGS: [Describe what the Google search results suggest about this company based on the NAICS codes found]
87
+
88
+ REASONING: [Your detailed reasoning about the company's industry classification]
89
 
90
  POSSIBILITY_1: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
91
  POSSIBILITY_2: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
 
100
 
101
  Company Name: {company_name}
102
  Context Information: {context}
103
+ Google Search Query Used: {search_query}
104
 
105
+ First, start with a section titled "GOOGLE_FINDINGS:" where you acknowledge that the Google search did not return any specific NAICS codes for this company.
106
+
107
+ Then, in a section titled "REASONING:", explain your reasoning for which industry this company belongs to based on the limited information available.
108
 
109
  Then list 3 potential NAICS classifications with confidence percentages (must add up to 100%).
110
  Finally, provide your final conclusion.
111
 
112
  Your response should be in this format:
113
+ GOOGLE_FINDINGS: No specific NAICS codes were found in the Google search results using the query "{search_query}".
114
+
115
+ REASONING: [Your detailed reasoning about the company's industry classification based on the limited information available]
116
 
117
  POSSIBILITY_1: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
118
  POSSIBILITY_2: [Industry name] - NAICS Code [6-digit code] - [XX]% confidence
 
123
  response = model.generate_content(prompt)
124
  response_text = response.text.strip()
125
 
126
+ # Extract Google findings
127
+ google_findings_match = re.search(r'GOOGLE_FINDINGS:(.*?)REASONING:', response_text, re.DOTALL | re.IGNORECASE)
128
+ google_findings = google_findings_match.group(1).strip() if google_findings_match else "No Google findings provided."
129
+
130
  # Extract reasoning
131
  reasoning_match = re.search(r'REASONING:(.*?)POSSIBILITY_1:', response_text, re.DOTALL | re.IGNORECASE)
132
  reasoning = reasoning_match.group(1).strip() if reasoning_match else "No reasoning provided."
 
164
 
165
  return {
166
  "naics_code": naics_code,
167
+ "google_findings": google_findings,
168
  "reasoning": reasoning,
169
  "possibilities": possibilities,
170
  "conclusion": conclusion
 
173
  print(f"Error getting NAICS classification: {str(e)}")
174
  return {
175
  "naics_code": "000000",
176
+ "google_findings": "Error occurred during Google search.",
177
  "reasoning": f"Error analyzing company: {str(e)}",
178
  "possibilities": [],
179
  "conclusion": "Error in analysis"
 
195
 
196
  # Get classification
197
  if not naics_candidates:
198
+ result = get_naics_classification(model, company_name, company_description, [], search_query)
199
  else:
200
+ result = get_naics_classification(model, company_name, company_description, naics_candidates, search_query)
201
 
202
  # Format the output with NAICS code at the end
203
  output = f"## Analysis for {company_name}\n\n"
204
+
205
+ # Display search query prominently at the top
206
+ output += f"**Google Search Query Used:**\n`{search_query}`\n\n"
207
+
208
+ # Add Google findings first
209
+ if 'google_findings' in result and result['google_findings']:
210
+ output += f"**Google Search Findings:**\n{result['google_findings']}\n\n"
211
+
212
+ # Then reasoning
213
  output += f"**Reasoning:**\n{result['reasoning']}\n\n"
214
 
215
  # Add possibilities section
 
218
  for i, possibility in enumerate(result['possibilities'], 1):
219
  output += f"{i}. {possibility}\n\n"
220
 
 
 
 
 
 
221
  # Add conclusion
222
  if 'conclusion' in result and result['conclusion']:
223
  output += f"**Conclusion:**\n{result['conclusion']}\n\n"