jyo01 commited on
Commit
f3f07d4
·
verified ·
1 Parent(s): 79e26b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -30
app.py CHANGED
@@ -19,6 +19,8 @@ import os
19
 
20
  HF_TOKEN = os.environ.get("HF_TOKEN")
21
  GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
 
 
22
 
23
 
24
  ############################################
@@ -134,39 +136,62 @@ def generate_prompt(query: str, context_snippets: list) -> str:
134
 
135
  # return answer
136
 
137
- def get_llm_response(prompt: str, model_name: str = "EleutherAI/gpt-neo-125M", max_new_tokens: int = None) -> str:
138
- if max_new_tokens is None:
139
- max_new_tokens = 256 # You can adjust this value as needed.
140
 
141
- torch.cuda.empty_cache()
142
 
143
- # Load the tokenizer and model for GPT-Neo 125M.
144
- tokenizer = AutoTokenizer.from_pretrained(model_name)
145
- model = AutoModelForCausalLM.from_pretrained(
146
- model_name,
147
- device_map="auto",
148
- use_safetensors=False,
149
- torch_dtype=torch.float32 # Using default precision since model is small.
150
- )
 
 
 
 
 
 
 
 
 
 
 
151
 
152
- text_gen = pipeline("text-generation", model=model, tokenizer=tokenizer)
153
- outputs = text_gen(
154
- prompt,
155
- max_new_tokens=max_new_tokens,
156
- do_sample=True,
157
- temperature=0.9, # Increased temperature
158
- top_p=0.9, # Using nucleus sampling
159
- top_k=50 # Limit to top 50 tokens per step
 
 
 
 
 
 
 
 
 
 
 
160
  )
161
- full_response = outputs[0]['generated_text']
162
 
163
- marker = "Answer:"
164
- if marker in full_response:
165
- answer = full_response.split(marker, 1)[1].strip()
166
- else:
167
- answer = full_response.strip()
 
168
 
169
- return answer
 
170
 
171
 
172
  ############################################
@@ -183,16 +208,25 @@ def get_file_content_for_choice(github_url: str, file_path: str):
183
  return content, file_path
184
 
185
  def chat_with_file(github_url: str, file_path: str, user_query: str):
 
186
  result = get_file_content_for_choice(github_url, file_path)
187
  if isinstance(result, str):
188
- return result # Error message
189
  file_content, selected_file = result
 
 
190
  preprocessed = preprocess_text(file_content)
191
- context_snippet = preprocessed[:1000] # use first 1000 characters as context
 
 
192
  prompt = generate_prompt(user_query, [context_snippet])
193
- llm_response = get_llm_response(prompt)
 
 
 
194
  return f"File: {selected_file}\n\nLLM Response:\n{llm_response}"
195
 
 
196
  def load_repo_contents_backend(github_url: str):
197
  try:
198
  owner, repo = extract_repo_info(github_url)
 
19
 
20
  HF_TOKEN = os.environ.get("HF_TOKEN")
21
  GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
22
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
23
+
24
 
25
 
26
  ############################################
 
136
 
137
  # return answer
138
 
139
+ # def get_llm_response(prompt: str, model_name: str = "EleutherAI/gpt-neo-125M", max_new_tokens: int = None) -> str:
140
+ # if max_new_tokens is None:
141
+ # max_new_tokens = 256 # You can adjust this value as needed.
142
 
143
+ # torch.cuda.empty_cache()
144
 
145
+ # # Load the tokenizer and model for GPT-Neo 125M.
146
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
147
+ # model = AutoModelForCausalLM.from_pretrained(
148
+ # model_name,
149
+ # device_map="auto",
150
+ # use_safetensors=False,
151
+ # torch_dtype=torch.float32 # Using default precision since model is small.
152
+ # )
153
+
154
+ # text_gen = pipeline("text-generation", model=model, tokenizer=tokenizer)
155
+ # outputs = text_gen(
156
+ # prompt,
157
+ # max_new_tokens=max_new_tokens,
158
+ # do_sample=True,
159
+ # temperature=0.9, # Increased temperature
160
+ # top_p=0.9, # Using nucleus sampling
161
+ # top_k=50 # Limit to top 50 tokens per step
162
+ # )
163
+ # full_response = outputs[0]['generated_text']
164
 
165
+ # marker = "Answer:"
166
+ # if marker in full_response:
167
+ # answer = full_response.split(marker, 1)[1].strip()
168
+ # else:
169
+ # answer = full_response.strip()
170
+
171
+ # return answer
172
+
173
+ def get_gemini_flash_response(prompt: str) -> str:
174
+ from google import genai
175
+ from google.genai import types
176
+
177
+ # Create a Gemini client using the API key from the environment.
178
+ client = genai.Client(api_key=GEMINI_API_KEY)
179
+
180
+ # Configure generation settings.
181
+ config = types.GenerateContentConfig(
182
+ max_output_tokens=500, # Adjust as needed.
183
+ temperature=0.1 # Lower temperature for more deterministic responses.
184
  )
 
185
 
186
+ # Send the prompt to the Gemini-2.0-flash model.
187
+ response = client.models.generate_content(
188
+ model="gemini-2.0-flash",
189
+ contents=[prompt],
190
+ config=config
191
+ )
192
 
193
+ return response.text
194
+
195
 
196
 
197
  ############################################
 
208
  return content, file_path
209
 
210
  def chat_with_file(github_url: str, file_path: str, user_query: str):
211
+ # Retrieve file content using the file path directly.
212
  result = get_file_content_for_choice(github_url, file_path)
213
  if isinstance(result, str):
214
+ return result # Return error message if occurred.
215
  file_content, selected_file = result
216
+
217
+ # Preprocess file content and extract context.
218
  preprocessed = preprocess_text(file_content)
219
+ context_snippet = preprocessed[:5000] # Use first 1000 characters as context.
220
+
221
+ # Generate the prompt based on context and user query.
222
  prompt = generate_prompt(user_query, [context_snippet])
223
+
224
+ # Use Gemini Flash to generate a response.
225
+ llm_response = get_gemini_flash_response(prompt)
226
+
227
  return f"File: {selected_file}\n\nLLM Response:\n{llm_response}"
228
 
229
+
230
  def load_repo_contents_backend(github_url: str):
231
  try:
232
  owner, repo = extract_repo_info(github_url)