Spaces:

MingZ6
/

GitHubHelper

Running

App Files Files Community

GitHubHelper / agents.py

MingZ6

Refactor load_config to support multiple environment variable names for API key and update default model

0a7721c 4 days ago

raw

history blame contribute delete

20.8 kB

	import warnings
	import os
	import json

	warnings.filterwarnings("ignore")
	from together import Together

	# Load configuration from config.json or environment variables
	def load_config():
	# Try to get API key from environment variables (for Hugging Face deployment)
	# Check both possible environment variable names
	together_api_key = os.environ.get("TOGETHER_API_KEY", "")

	# If not found with TOGETHER_API_KEY, try together_ai_token (the name used in your HF secret)
	if not together_api_key:
	together_api_key = os.environ.get("together_ai_token", "")

	# If still not found in environment, try to load from config.json (for local development)
	if not together_api_key:
	try:
	config_path = os.path.join(os.path.dirname(__file__), "config.json")
	if os.path.exists(config_path):
	with open(config_path, "r") as f:
	config = json.load(f)
	together_api_key = config.get("together_ai_token", "")
	model_name = config.get("model", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8")
	return {"together_ai_token": together_api_key, "model": model_name}
	except Exception as e:
	print(f"Error loading config.json: {str(e)}")

	# Print debug information
	print(f"API key found: {'Yes' if together_api_key else 'No'}")

	# Return a config dictionary with the API key from environment variable
	return {
	"together_ai_token": together_api_key,
	"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo" # Using the model from your config.json
	}

	# Get API key and model from config
	config = load_config()
	your_api_key = config["together_ai_token"]
	model = config.get("model", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8") # Use default if not in config

	# Initialize client only if we have an API key
	client = Together(api_key=your_api_key) if your_api_key else None

	def prompt_llm(prompt, show_cost=False):
	# This function allows us to prompt an LLM via the Together API
	if not client:
	return "Error: Together API client not initialized. Please check your API key."

	# Calculate the number of tokens
	tokens = len(prompt.split())

	# Calculate and print estimated cost for each model
	if show_cost:
	print(f"\nNumber of tokens: {tokens}")
	cost = (0.1 / 1_000_000) * tokens
	print(f"Estimated cost for {model}: ${cost:.10f}\n")

	# Make the API call
	try:
	response = client.chat.completions.create(
	model=model,
	messages=[{"role": "user", "content": prompt}],
	)
	return response.choices[0].message.content
	except Exception as e:
	return f"Error calling Together API: {str(e)}"


	class SummarizerAgent:
	def __init__(self):
	self.client = client

	def process(self, content):
	prompt = """SYSTEM: You are an expert code summarizer.
	Your task is to condense the provided code into a clear, informative summary of exactly 4 lines.

	INSTRUCTIONS:
	• Identify and include only the most important functionality
	• Explain what the code does and its purpose
	• Ensure the summary is exactly 4 lines long
	• Use concise, clear language
	• Show output only - provide just the summary
	• Do not include any other text or comments, show only the summary
	* do not say "Here is a 4-line summary: " show the summary directly and nothing else

	Code to summarize: {content}

	Provide a 4-line summary:"""

	return prompt_llm(prompt.format(content=content))


	class InsightAgent:
	def __init__(self):
	self.client = client

	def process_text(self, summaries):
	# Process a list of summary texts directly
	all_summaries = "\n\n".join(summaries)
	return self._generate_insights(all_summaries)

	def _generate_insights(self, all_summaries):
	prompt = """SYSTEM: You are an expert code analyst who can identify key insights from code summaries.

	INSTRUCTIONS:
	• Review the provided code summaries
	• Identify 3 key insights that represent the most important takeaways
	• Consider code structure, patterns, and best practices
	• Format your response as exactly 3 bullet points
	• Each bullet point must be a single sentence
	• Be concise, clear, and informative
	• Do not include any introductory or concluding text
	• Show only the 3 bullet points, nothing else

	Summaries to analyze:
	{summaries}

	Provide exactly 3 bullet point insights:"""

	return prompt_llm(prompt.format(summaries=all_summaries))


	class RecommenderAgent:
	def __init__(self):
	self.client = client

	def process(self, insights, summaries, user_goal, persona=""):
	prompt = """SYSTEM: You are an expert code consultant who provides actionable recommendations.

	INSTRUCTIONS:
	• Review the provided insights and summaries about the code
	• Consider the user's specific goal: {user_goal}
	• Consider the user's persona: {persona}
	• Recommend exactly 2 specific, actionable steps the user can take to improve or work with this codebase
	• Each recommendation should be practical, specific, and directly related to the goal
	• Format your response as exactly 2 bullet points
	• Each bullet point should be 1-2 sentences
	• Be concise, clear, and actionable
	• Do not include any introductory or concluding text
	• Show only the 2 bullet point recommendations, nothing else

	Insights:
	{insights}

	Additional context from summaries:
	{summaries}

	User's goal: {user_goal}
	User's persona: {persona}

	Provide exactly 2 actionable recommendations:"""

	return prompt_llm(
	prompt.format(
	insights=insights,
	summaries="\n\n".join(summaries),
	user_goal=user_goal,
	persona=persona if persona else "General user",
	)
	)

	def suggest_next_query(self, insights, summaries, user_goal, persona=""):
	"""Generate a suggested next search query based on insights, summaries, and user goal."""
	prompt = f"""
	Based on the following insights and summaries about code, and considering the user's goal and persona,
	suggest ONE specific area of the codebase the user should explore next.

	INSIGHTS:
	{insights}

	SUMMARIES:
	{summaries}

	USER'S GOAL:
	{user_goal}

	USER'S PERSONA:
	{persona if persona else "General user"}

	Suggest a specific, focused area or component (5-10 words) that would help the user find additional
	information to achieve their goal. This should guide their next exploration of the repository.

	NEXT EXPLORATION AREA:
	"""

	return prompt_llm(prompt)


	class QuestionGeneratorAgent:
	def __init__(self):
	self.client = client

	def generate_questions(self, content, category, source):
	prompt_template_path = os.path.join(os.path.dirname(__file__), "PromptTemplate.json")
	with open(prompt_template_path, "r") as f:
	import json
	prompt_data = json.load(f)
	template = prompt_data["prompt_template"]

	prompt = template.format(
	content=content,
	category=category,
	source=source
	)

	return prompt_llm(prompt)


	class CLISetupAgent:
	def __init__(self):
	self.client = client

	def generate_setup_instructions(self, repo_content, repo_metadata):
	"""Generate step-by-step CLI instructions to set up the environment for a repository."""
	language = repo_metadata.get("language", "")
	repo_name = repo_metadata.get("name", "")
	repo_url = repo_metadata.get("url", "")

	# Collect all potential setup files in the repo
	setup_files = {}
	common_setup_files = [
	"requirements.txt", "package.json", "setup.py", "Dockerfile",
	"docker-compose.yml", ".env.example", "Makefile", "README.md"
	]

	for filename, content in repo_content.items():
	if filename in common_setup_files or filename.endswith((".yml", ".yaml", ".sh", ".bat")):
	setup_files[filename] = content

	# Default setup steps if no specific files are found
	default_steps = f"""
	1. Clone the repository:
	```
	git clone {repo_url}
	cd {repo_name}
	```

	2. Check the repository structure:
	```
	ls -la
	```

	3. Read the README file for specific instructions:
	```
	cat README.md
	```
	"""

	# If we have no setup files, provide basic instructions
	if not setup_files:
	return default_steps

	# Create a prompt with all the relevant information
	prompt = f"""
	SYSTEM: You are an expert DevOps engineer who provides clear CLI setup instructions.

	INSTRUCTIONS:
	• Generate step-by-step CLI instructions to set up a development environment for the given repository
	• The repository is named "{repo_name}" and primarily uses {language if language else "unknown language"}
	• Include commands for cloning, installing dependencies, and basic configuration
	• Format your response as a numbered list with clear command-line instructions
	• Include comments explaining what each command does
	• Focus on practical, executable commands that work on both macOS/Linux and Windows where possible
	• If different platforms require different commands, clearly indicate which is for which
	• Mention any prerequisites that need to be installed (like Python, Node.js, Docker, etc.)

	REPOSITORY INFORMATION:
	Name: {repo_name}
	Primary Language: {language if language else "Not specified"}
	URL: {repo_url}

	RELEVANT SETUP FILES:
	{chr(10).join([f"--- {name} ---{chr(10)}{content[:300]}..." for name, content in setup_files.items()])}

	Provide a step-by-step CLI setup guide with exactly 5-10 commands:
	"""

	try:
	result = prompt_llm(prompt)
	# Check if result is empty or invalid
	if not result or len(result.strip()) < 10:
	return default_steps
	return result
	except Exception as e:
	print(f"Error generating CLI setup: {str(e)}")
	return default_steps


	class ChatbotAgent:
	"""Agent for answering questions about GitHub repositories."""

	def __init__(self):
	self.client = client

	def answer_question(self, question, repo_content, repo_metadata, summaries=None, insights=None):
	"""
	Answer a question about a GitHub repository based on its content and analysis.

	Args:
	question: The user's question about the repository
	repo_content: Dictionary of repository files and their content
	repo_metadata: Repository metadata like name, description, etc.
	summaries: Optional dictionary of file summaries
	insights: Optional insights about the repository

	Returns:
	A string containing the answer to the question
	"""
	# Extract key repository information
	repo_name = repo_metadata.get("name", "Unknown repository")
	repo_description = repo_metadata.get("description", "No description available")
	repo_language = repo_metadata.get("language", "Unknown")

	# Create a context from the repository information
	context = f"Repository: {repo_name}\nDescription: {repo_description}\nLanguage: {repo_language}\n\n"

	# Add insights if available
	if insights:
	context += f"Key insights:\n{insights}\n\n"

	# Add summaries if available
	if summaries:
	context += "File summaries:\n"
	for filename, summary in summaries.items():
	context += f"- {filename}: {summary}\n"
	context += "\n"

	# Select relevant files for the question to avoid token limit issues
	relevant_files = self._select_relevant_files(question, repo_content, max_files=5)

	# Add content of relevant files
	if relevant_files:
	context += "Relevant files:\n"
	for filename, content in relevant_files.items():
	# Truncate long files
	if len(content) > 1000:
	context += f"--- {filename} (truncated) ---\n{content[:1000]}...\n\n"
	else:
	context += f"--- {filename} ---\n{content}\n\n"

	# Create the prompt for the LLM
	prompt = f"""SYSTEM: You are a GitHub repository expert assistant. You provide accurate, helpful answers
	about code repositories based on their content, structure, and analysis. Draw upon the
	provided context to answer the question. If you don't know the answer, say so honestly.

	CONTEXT INFORMATION:
	{context}

	USER QUESTION:
	{question}

	Provide a clear, concise answer to the question based only on the information provided above.
	Include code snippets or commands when relevant. Be specific and informative.
	"""

	return prompt_llm(prompt)

	def _select_relevant_files(self, question, repo_content, max_files=5):
	"""Select files from the repository that are most relevant to the question."""

	# If there are only a few files, return all of them
	if len(repo_content) <= max_files:
	return repo_content

	# For more files, select the most relevant ones based on the question
	relevant_files = {}

	# Create a prompt to identify relevant file types for the question
	file_selection_prompt = f"""SYSTEM: You are a code repository expert. Given a question about a repository,
	identify what types of files would be most relevant to answer it.

	QUESTION: {question}

	Based on this question, list ONLY 3-5 file patterns or extensions that would be most relevant
	for answering it. For example: 'README.md', '.py', 'package.json', 'Dockerfile', etc.
	Just list the patterns, one per line, without any explanation or additional text.
	"""

	# Get relevant file patterns
	try:
	file_patterns_response = prompt_llm(file_selection_prompt)
	file_patterns = [pattern.strip().lower() for pattern in file_patterns_response.split('\n') if pattern.strip()]

	# Filter files based on patterns
	for filename, content in repo_content.items():
	filename_lower = filename.lower()

	# Check if file matches any of the patterns
	if any(pattern in filename_lower for pattern in file_patterns):
	relevant_files[filename] = content

	# Stop when we reach the maximum number of files
	if len(relevant_files) >= max_files:
	break

	# If we didn't find enough files with patterns, add important files
	if len(relevant_files) < max_files:
	important_files = ['readme.md', 'setup.py', 'requirements.txt', 'package.json', 'dockerfile']

	for filename, content in repo_content.items():
	if filename.lower() in important_files and filename not in relevant_files:
	relevant_files[filename] = content

	# Stop when we reach the maximum number of files
	if len(relevant_files) >= max_files:
	break

	# If we still don't have enough files, add some random ones
	remaining_slots = max_files - len(relevant_files)
	if remaining_slots > 0:
	for filename, content in repo_content.items():
	if filename not in relevant_files:
	relevant_files[filename] = content
	remaining_slots -= 1

	if remaining_slots <= 0:
	break

	except Exception as e:
	print(f"Error selecting relevant files: {str(e)}")
	# Fallback: just take the first max_files
	relevant_files = dict(list(repo_content.items())[:max_files])

	return relevant_files


	class PRReviewAgent:
	"""Agent for reviewing GitHub Pull Requests and providing professional code feedback."""

	def __init__(self):
	self.client = client

	def review_pr(self, pr_details, target_branch_code):
	"""
	Review a GitHub pull request and provide professional code suggestions.

	Args:
	pr_details: Dictionary containing PR files, metadata, and changes
	target_branch_code: Dictionary of target branch files and their content

	Returns:
	A dictionary containing code suggestions and optimization recommendations
	"""
	# Extract PR information
	pr_title = pr_details.get("title", "Untitled PR")
	pr_description = pr_details.get("description", "No description")
	changed_files = pr_details.get("changed_files", [])

	# Prepare context for the review
	context = f"Pull Request: {pr_title}\nDescription: {pr_description}\n\n"

	# Add changed files info
	if changed_files:
	context += "Files changed in this PR:\n"
	for file_info in changed_files:
	filename = file_info.get("filename", "unknown")
	changes = file_info.get("patch", "No changes available")
	context += f"--- {filename} ---\n{changes}\n\n"

	# Add target branch context for the files that were changed
	relevant_target_files = {}
	for file_info in changed_files:
	filename = file_info.get("filename", "")
	if filename in target_branch_code:
	relevant_target_files[filename] = target_branch_code[filename]

	if relevant_target_files:
	context += "Relevant files in target branch:\n"
	for filename, content in relevant_target_files.items():
	# Truncate long files
	if len(content) > 1000:
	truncated_content = content[:1000] + "..."
	context += f"--- {filename} (truncated) ---\n{truncated_content}\n\n"
	else:
	context += f"--- {filename} ---\n{content}\n\n"

	# Generate code review
	code_review_prompt = f"""SYSTEM: You are a senior software developer reviewing a GitHub Pull Request.
	Provide professional, constructive feedback on the code changes. Focus on:

	1. Code style and adherence to best practices
	2. Potential bugs or issues
	3. Architecture and design considerations
	4. Performance implications

	CONTEXT INFORMATION:
	{context}

	Provide your code review in the following format:

	## Overall Assessment
	[A brief 2-3 sentence assessment of the PR]

	## Code Quality Suggestions
	- [Specific suggestion 1 with code example if applicable]
	- [Specific suggestion 2 with code example if applicable]
	- [Add more if necessary, at least 3 suggestions]

	## Optimization Opportunities
	- [Specific optimization 1 with code example if applicable]
	- [Specific optimization 2 with code example if applicable]
	- [Add more if necessary, at least 2 suggestions]

	Your review should be professional, specific, and actionable. Provide code examples where appropriate.
	"""

	review_result = prompt_llm(code_review_prompt)

	return {
	"review": review_result
	}