from transformers import pipeline import os import glob import ast import re from typing import List, Dict, Set, Any import pkg_resources import importlib.util from collections import defaultdict import huggingface_hub class CodeAnalyzer2: def __init__(self): # Using different models for different types of analysis self.summarizer = pipeline("summarization", model="Graverman/t5-code-summary") def detect_technologies(self, code_files: Dict[str, str]) -> Dict[str, Any]: """Detect technologies used in the project""" tech_stack = { "languages": set(), "frameworks": set(), "dependencies": set() } # Detect languages extensions_map = { '.py': 'Python', '.js': 'JavaScript', '.jsx': 'React/JavaScript', '.ts': 'TypeScript', '.tsx': 'React/TypeScript', '.java': 'Java' } for file_path in code_files.keys(): ext = os.path.splitext(file_path)[1] if ext in extensions_map: tech_stack["languages"].add(extensions_map[ext]) # Analyze Python dependencies for file_path, content in code_files.items(): if file_path.endswith('.py'): try: tree = ast.parse(content) for node in ast.walk(tree): if isinstance(node, ast.Import): for name in node.names: tech_stack["dependencies"].add(name.name.split('.')[0]) elif isinstance(node, ast.ImportFrom): if node.module: tech_stack["dependencies"].add(node.module.split('.')[0]) except: continue # Check if common frameworks are used framework_indicators = { 'django': 'Django', 'flask': 'Flask', 'fastapi': 'FastAPI', 'react': 'React', 'angular': 'Angular', 'vue': 'Vue.js', 'spring': 'Spring', 'tensorflow': 'TensorFlow', 'torch': 'PyTorch', 'pandas': 'Pandas', 'numpy': 'NumPy' } for dep in tech_stack["dependencies"]: if dep.lower() in framework_indicators: tech_stack["frameworks"].add(framework_indicators[dep.lower()]) return {k: list(v) for k, v in tech_stack.items()} def analyze_code_complexity(self, code_files: Dict[str, str]) -> Dict[str, Any]: """Analyze code complexity metrics""" metrics = { "total_lines": 0, "code_lines": 0, "class_count": 0, "function_count": 0, "complexity_score": 0 } for file_path, content in code_files.items(): if file_path.endswith('.py'): try: tree = ast.parse(content) metrics["class_count"] += sum(1 for node in ast.walk(tree) if isinstance(node, ast.ClassDef)) metrics["function_count"] += sum(1 for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)) lines = content.split('\n') metrics["total_lines"] += len(lines) metrics["code_lines"] += sum(1 for line in lines if line.strip() and not line.strip().startswith('#')) # Simple complexity score based on nesting depth and branches complexity = 0 for node in ast.walk(tree): if isinstance(node, (ast.If, ast.For, ast.While, ast.Try)): complexity += 1 metrics["complexity_score"] += complexity except: continue return metrics def identify_objective(self, code_files: Dict[str, str]) -> str: """Identify the main objective of the project""" # Combine all Python docstrings and comments all_docs = [] for file_path, content in code_files.items(): if file_path.endswith('.py'): try: tree = ast.parse(content) for node in ast.walk(tree): if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.Module)): if ast.get_docstring(node): all_docs.append(ast.get_docstring(node)) except: continue combined_docs = " ".join(all_docs) if combined_docs: return self.summarizer(combined_docs, max_length=100, min_length=30, do_sample=False)[0]['summary_text'] return "Unable to determine project objective from available documentation" def read_code_files(self, directory: str) -> Dict[str, str]: """Read all code files from the given directory""" code_files = {} extensions = ['.py', '.java', '.jsx', '.js', '.ts', '.tsx'] for ext in extensions: for file_path in glob.glob(f"{directory}/**/*{ext}", recursive=True): try: with open(file_path, 'r', encoding='utf-8') as f: code_files[file_path] = f.read() except Exception as e: print(f"Error reading {file_path}: {e}") return code_files def generate_summary(self, code: str, context: str = "") -> str: """Generate a summary for the given code with optional context""" if not code.strip(): return "No code provided" # Truncate input if too long code = code[:4000] prompt = f"{context}\n{code}" if context else code summary = self.summarizer(prompt, max_length=150, min_length=40, do_sample=False)[0]['summary_text'] return summary def analyze_project(self, project_dir: str, questions_file: str) -> Dict[str, Any]: """Analyze project and answer questions""" # Read code files code_files = self.read_code_files(project_dir) if not code_files: return { "project_summary": "No code files found", "tech_stack": {}, "metrics": {}, "objective": "No code files to analyze", "answers": {} } # Perform various analyses tech_stack = self.detect_technologies(code_files) metrics = self.analyze_code_complexity(code_files) objective = self.identify_objective(code_files) # Generate overall summary combined_code = "\n\n".join(code_files.values()) summary = self.generate_summary(combined_code) # Read questions with open(questions_file, 'r') as f: questions = [line.strip() for line in f.readlines() if line.strip()] # Generate targeted answers based on analysis results answers = {} for question in questions: question_lower = question.lower() if 'abstract' in question_lower: answers[question] = objective elif 'architecture' in question_lower: arch_summary = f"Project Architecture:\n- Languages: {', '.join(tech_stack['languages'])}\n" if tech_stack['frameworks']: arch_summary += f"- Frameworks: {', '.join(tech_stack['frameworks'])}\n" arch_summary += f"- Components: {metrics['class_count']} classes, {metrics['function_count']} functions" answers[question] = arch_summary elif 'software' in question_lower and 'requirement' in question_lower: deps = tech_stack['dependencies'] frameworks = tech_stack['frameworks'] req_list = list(set(deps) | set(frameworks)) answers[question] = f"Software Requirements:\n- Python environment\n- Dependencies: {', '.join(req_list)}" elif 'hardware' in question_lower and 'requirement' in question_lower: complexity = "Low" if metrics['complexity_score'] < 10 else "Medium" if metrics['complexity_score'] < 30 else "High" answers[question] = f"Hardware Requirements:\n- Complexity: {complexity}\n- Minimum RAM: {2 if complexity == 'Low' else 4 if complexity == 'Medium' else 8}GB\n- CPU: {1 if complexity == 'Low' else 2 if complexity == 'Medium' else 4}+ cores recommended" else: # For other questions, generate a contextual summary answers[question] = self.generate_summary(combined_code, f"Context: {question}") return { "project_summary": summary, "tech_stack": tech_stack, "metrics": metrics, "objective": objective, "answers": answers } # if __name__ == "__main__": # analyzer = CodeAnalyzer() # # Example usage # results = analyzer.analyze_project( # "./example_project", # "./questions.txt" # ) # print("\nProject Objective:", results["objective"]) # print("\nTechnology Stack:") # for category, items in results["tech_stack"].items(): # print(f"- {category.title()}: {', '.join(items)}") # print("\nCode Metrics:") # for metric, value in results["metrics"].items(): # print(f"- {metric.replace('_', ' ').title()}: {value}") # print("\nAnswers to Questions:") # for q, a in results["answers"].items(): # print(f"\n{q}:\n{a}")