Spaces:
Runtime error
Runtime error
from transformers import pipeline | |
import os | |
import glob | |
import ast | |
import re | |
from typing import List, Dict, Set, Any | |
import pkg_resources | |
import importlib.util | |
from collections import defaultdict | |
import huggingface_hub | |
class CodeAnalyzer2: | |
def __init__(self): | |
# Using different models for different types of analysis | |
self.summarizer = pipeline("summarization", model="Graverman/t5-code-summary") | |
def detect_technologies(self, code_files: Dict[str, str]) -> Dict[str, Any]: | |
"""Detect technologies used in the project""" | |
tech_stack = { | |
"languages": set(), | |
"frameworks": set(), | |
"dependencies": set() | |
} | |
# Detect languages | |
extensions_map = { | |
'.py': 'Python', | |
'.js': 'JavaScript', | |
'.jsx': 'React/JavaScript', | |
'.ts': 'TypeScript', | |
'.tsx': 'React/TypeScript', | |
'.java': 'Java' | |
} | |
for file_path in code_files.keys(): | |
ext = os.path.splitext(file_path)[1] | |
if ext in extensions_map: | |
tech_stack["languages"].add(extensions_map[ext]) | |
# Analyze Python dependencies | |
for file_path, content in code_files.items(): | |
if file_path.endswith('.py'): | |
try: | |
tree = ast.parse(content) | |
for node in ast.walk(tree): | |
if isinstance(node, ast.Import): | |
for name in node.names: | |
tech_stack["dependencies"].add(name.name.split('.')[0]) | |
elif isinstance(node, ast.ImportFrom): | |
if node.module: | |
tech_stack["dependencies"].add(node.module.split('.')[0]) | |
except: | |
continue | |
# Check if common frameworks are used | |
framework_indicators = { | |
'django': 'Django', | |
'flask': 'Flask', | |
'fastapi': 'FastAPI', | |
'react': 'React', | |
'angular': 'Angular', | |
'vue': 'Vue.js', | |
'spring': 'Spring', | |
'tensorflow': 'TensorFlow', | |
'torch': 'PyTorch', | |
'pandas': 'Pandas', | |
'numpy': 'NumPy' | |
} | |
for dep in tech_stack["dependencies"]: | |
if dep.lower() in framework_indicators: | |
tech_stack["frameworks"].add(framework_indicators[dep.lower()]) | |
return {k: list(v) for k, v in tech_stack.items()} | |
def analyze_code_complexity(self, code_files: Dict[str, str]) -> Dict[str, Any]: | |
"""Analyze code complexity metrics""" | |
metrics = { | |
"total_lines": 0, | |
"code_lines": 0, | |
"class_count": 0, | |
"function_count": 0, | |
"complexity_score": 0 | |
} | |
for file_path, content in code_files.items(): | |
if file_path.endswith('.py'): | |
try: | |
tree = ast.parse(content) | |
metrics["class_count"] += sum(1 for node in ast.walk(tree) if isinstance(node, ast.ClassDef)) | |
metrics["function_count"] += sum(1 for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)) | |
lines = content.split('\n') | |
metrics["total_lines"] += len(lines) | |
metrics["code_lines"] += sum(1 for line in lines if line.strip() and not line.strip().startswith('#')) | |
# Simple complexity score based on nesting depth and branches | |
complexity = 0 | |
for node in ast.walk(tree): | |
if isinstance(node, (ast.If, ast.For, ast.While, ast.Try)): | |
complexity += 1 | |
metrics["complexity_score"] += complexity | |
except: | |
continue | |
return metrics | |
def identify_objective(self, code_files: Dict[str, str]) -> str: | |
"""Identify the main objective of the project""" | |
# Combine all Python docstrings and comments | |
all_docs = [] | |
for file_path, content in code_files.items(): | |
if file_path.endswith('.py'): | |
try: | |
tree = ast.parse(content) | |
for node in ast.walk(tree): | |
if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.Module)): | |
if ast.get_docstring(node): | |
all_docs.append(ast.get_docstring(node)) | |
except: | |
continue | |
combined_docs = " ".join(all_docs) | |
if combined_docs: | |
return self.summarizer(combined_docs, max_length=100, min_length=30, do_sample=False)[0]['summary_text'] | |
return "Unable to determine project objective from available documentation" | |
def read_code_files(self, directory: str) -> Dict[str, str]: | |
"""Read all code files from the given directory""" | |
code_files = {} | |
extensions = ['.py', '.java', '.jsx', '.js', '.ts', '.tsx'] | |
for ext in extensions: | |
for file_path in glob.glob(f"{directory}/**/*{ext}", recursive=True): | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
code_files[file_path] = f.read() | |
except Exception as e: | |
print(f"Error reading {file_path}: {e}") | |
return code_files | |
def generate_summary(self, code: str, context: str = "") -> str: | |
"""Generate a summary for the given code with optional context""" | |
if not code.strip(): | |
return "No code provided" | |
# Truncate input if too long | |
code = code[:4000] | |
prompt = f"{context}\n{code}" if context else code | |
summary = self.summarizer(prompt, max_length=150, min_length=40, do_sample=False)[0]['summary_text'] | |
return summary | |
def analyze_project(self, project_dir: str, questions_file: str) -> Dict[str, Any]: | |
"""Analyze project and answer questions""" | |
# Read code files | |
code_files = self.read_code_files(project_dir) | |
if not code_files: | |
return { | |
"project_summary": "No code files found", | |
"tech_stack": {}, | |
"metrics": {}, | |
"objective": "No code files to analyze", | |
"answers": {} | |
} | |
# Perform various analyses | |
tech_stack = self.detect_technologies(code_files) | |
metrics = self.analyze_code_complexity(code_files) | |
objective = self.identify_objective(code_files) | |
# Generate overall summary | |
combined_code = "\n\n".join(code_files.values()) | |
summary = self.generate_summary(combined_code) | |
# Read questions | |
with open(questions_file, 'r') as f: | |
questions = [line.strip() for line in f.readlines() if line.strip()] | |
# Generate targeted answers based on analysis results | |
answers = {} | |
for question in questions: | |
question_lower = question.lower() | |
if 'abstract' in question_lower: | |
answers[question] = objective | |
elif 'architecture' in question_lower: | |
arch_summary = f"Project Architecture:\n- Languages: {', '.join(tech_stack['languages'])}\n" | |
if tech_stack['frameworks']: | |
arch_summary += f"- Frameworks: {', '.join(tech_stack['frameworks'])}\n" | |
arch_summary += f"- Components: {metrics['class_count']} classes, {metrics['function_count']} functions" | |
answers[question] = arch_summary | |
elif 'software' in question_lower and 'requirement' in question_lower: | |
deps = tech_stack['dependencies'] | |
frameworks = tech_stack['frameworks'] | |
req_list = list(set(deps) | set(frameworks)) | |
answers[question] = f"Software Requirements:\n- Python environment\n- Dependencies: {', '.join(req_list)}" | |
elif 'hardware' in question_lower and 'requirement' in question_lower: | |
complexity = "Low" if metrics['complexity_score'] < 10 else "Medium" if metrics['complexity_score'] < 30 else "High" | |
answers[question] = f"Hardware Requirements:\n- Complexity: {complexity}\n- Minimum RAM: {2 if complexity == 'Low' else 4 if complexity == 'Medium' else 8}GB\n- CPU: {1 if complexity == 'Low' else 2 if complexity == 'Medium' else 4}+ cores recommended" | |
else: | |
# For other questions, generate a contextual summary | |
answers[question] = self.generate_summary(combined_code, f"Context: {question}") | |
return { | |
"project_summary": summary, | |
"tech_stack": tech_stack, | |
"metrics": metrics, | |
"objective": objective, | |
"answers": answers | |
} | |
# if __name__ == "__main__": | |
# analyzer = CodeAnalyzer() | |
# # Example usage | |
# results = analyzer.analyze_project( | |
# "./example_project", | |
# "./questions.txt" | |
# ) | |
# print("\nProject Objective:", results["objective"]) | |
# print("\nTechnology Stack:") | |
# for category, items in results["tech_stack"].items(): | |
# print(f"- {category.title()}: {', '.join(items)}") | |
# print("\nCode Metrics:") | |
# for metric, value in results["metrics"].items(): | |
# print(f"- {metric.replace('_', ' ').title()}: {value}") | |
# print("\nAnswers to Questions:") | |
# for q, a in results["answers"].items(): | |
# print(f"\n{q}:\n{a}") | |