Spaces:

vijayvizag
/

code-to-doc-streamlit

Runtime error

App Files Files Community

code-to-doc-streamlit / code_analyzer2.py

vijayvizag

readme update

bcb80f2 9 days ago

raw

history blame

9.8 kB

	from transformers import pipeline
	import os
	import glob
	import ast
	import re
	from typing import List, Dict, Set, Any
	import pkg_resources
	import importlib.util
	from collections import defaultdict
	import huggingface_hub

	class CodeAnalyzer2:
	def __init__(self):
	# Using different models for different types of analysis
	self.summarizer = pipeline("summarization", model="Graverman/t5-code-summary")

	def detect_technologies(self, code_files: Dict[str, str]) -> Dict[str, Any]:
	"""Detect technologies used in the project"""
	tech_stack = {
	"languages": set(),
	"frameworks": set(),
	"dependencies": set()
	}

	# Detect languages
	extensions_map = {
	'.py': 'Python',
	'.js': 'JavaScript',
	'.jsx': 'React/JavaScript',
	'.ts': 'TypeScript',
	'.tsx': 'React/TypeScript',
	'.java': 'Java'
	}

	for file_path in code_files.keys():
	ext = os.path.splitext(file_path)[1]
	if ext in extensions_map:
	tech_stack["languages"].add(extensions_map[ext])

	# Analyze Python dependencies
	for file_path, content in code_files.items():
	if file_path.endswith('.py'):
	try:
	tree = ast.parse(content)
	for node in ast.walk(tree):
	if isinstance(node, ast.Import):
	for name in node.names:
	tech_stack["dependencies"].add(name.name.split('.')[0])
	elif isinstance(node, ast.ImportFrom):
	if node.module:
	tech_stack["dependencies"].add(node.module.split('.')[0])
	except:
	continue

	# Check if common frameworks are used
	framework_indicators = {
	'django': 'Django',
	'flask': 'Flask',
	'fastapi': 'FastAPI',
	'react': 'React',
	'angular': 'Angular',
	'vue': 'Vue.js',
	'spring': 'Spring',
	'tensorflow': 'TensorFlow',
	'torch': 'PyTorch',
	'pandas': 'Pandas',
	'numpy': 'NumPy'
	}

	for dep in tech_stack["dependencies"]:
	if dep.lower() in framework_indicators:
	tech_stack["frameworks"].add(framework_indicators[dep.lower()])

	return {k: list(v) for k, v in tech_stack.items()}

	def analyze_code_complexity(self, code_files: Dict[str, str]) -> Dict[str, Any]:
	"""Analyze code complexity metrics"""
	metrics = {
	"total_lines": 0,
	"code_lines": 0,
	"class_count": 0,
	"function_count": 0,
	"complexity_score": 0
	}

	for file_path, content in code_files.items():
	if file_path.endswith('.py'):
	try:
	tree = ast.parse(content)
	metrics["class_count"] += sum(1 for node in ast.walk(tree) if isinstance(node, ast.ClassDef))
	metrics["function_count"] += sum(1 for node in ast.walk(tree) if isinstance(node, ast.FunctionDef))

	lines = content.split('\n')
	metrics["total_lines"] += len(lines)
	metrics["code_lines"] += sum(1 for line in lines if line.strip() and not line.strip().startswith('#'))

	# Simple complexity score based on nesting depth and branches
	complexity = 0
	for node in ast.walk(tree):
	if isinstance(node, (ast.If, ast.For, ast.While, ast.Try)):
	complexity += 1
	metrics["complexity_score"] += complexity
	except:
	continue

	return metrics

	def identify_objective(self, code_files: Dict[str, str]) -> str:
	"""Identify the main objective of the project"""
	# Combine all Python docstrings and comments
	all_docs = []
	for file_path, content in code_files.items():
	if file_path.endswith('.py'):
	try:
	tree = ast.parse(content)
	for node in ast.walk(tree):
	if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.Module)):
	if ast.get_docstring(node):
	all_docs.append(ast.get_docstring(node))
	except:
	continue

	combined_docs = " ".join(all_docs)
	if combined_docs:
	return self.summarizer(combined_docs, max_length=100, min_length=30, do_sample=False)[0]['summary_text']
	return "Unable to determine project objective from available documentation"

	def read_code_files(self, directory: str) -> Dict[str, str]:
	"""Read all code files from the given directory"""
	code_files = {}
	extensions = ['.py', '.java', '.jsx', '.js', '.ts', '.tsx']

	for ext in extensions:
	for file_path in glob.glob(f"{directory}/*/{ext}", recursive=True):
	try:
	with open(file_path, 'r', encoding='utf-8') as f:
	code_files[file_path] = f.read()
	except Exception as e:
	print(f"Error reading {file_path}: {e}")

	return code_files

	def generate_summary(self, code: str, context: str = "") -> str:
	"""Generate a summary for the given code with optional context"""
	if not code.strip():
	return "No code provided"

	# Truncate input if too long
	code = code[:4000]
	prompt = f"{context}\n{code}" if context else code

	summary = self.summarizer(prompt, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
	return summary

	def analyze_project(self, project_dir: str, questions_file: str) -> Dict[str, Any]:
	"""Analyze project and answer questions"""
	# Read code files
	code_files = self.read_code_files(project_dir)

	if not code_files:
	return {
	"project_summary": "No code files found",
	"tech_stack": {},
	"metrics": {},
	"objective": "No code files to analyze",
	"answers": {}
	}

	# Perform various analyses
	tech_stack = self.detect_technologies(code_files)
	metrics = self.analyze_code_complexity(code_files)
	objective = self.identify_objective(code_files)

	# Generate overall summary
	combined_code = "\n\n".join(code_files.values())
	summary = self.generate_summary(combined_code)

	# Read questions
	with open(questions_file, 'r') as f:
	questions = [line.strip() for line in f.readlines() if line.strip()]

	# Generate targeted answers based on analysis results
	answers = {}
	for question in questions:
	question_lower = question.lower()
	if 'abstract' in question_lower:
	answers[question] = objective
	elif 'architecture' in question_lower:
	arch_summary = f"Project Architecture:\n- Languages: {', '.join(tech_stack['languages'])}\n"
	if tech_stack['frameworks']:
	arch_summary += f"- Frameworks: {', '.join(tech_stack['frameworks'])}\n"
	arch_summary += f"- Components: {metrics['class_count']} classes, {metrics['function_count']} functions"
	answers[question] = arch_summary
	elif 'software' in question_lower and 'requirement' in question_lower:
	deps = tech_stack['dependencies']
	frameworks = tech_stack['frameworks']
	req_list = list(set(deps) \| set(frameworks))
	answers[question] = f"Software Requirements:\n- Python environment\n- Dependencies: {', '.join(req_list)}"
	elif 'hardware' in question_lower and 'requirement' in question_lower:
	complexity = "Low" if metrics['complexity_score'] < 10 else "Medium" if metrics['complexity_score'] < 30 else "High"
	answers[question] = f"Hardware Requirements:\n- Complexity: {complexity}\n- Minimum RAM: {2 if complexity == 'Low' else 4 if complexity == 'Medium' else 8}GB\n- CPU: {1 if complexity == 'Low' else 2 if complexity == 'Medium' else 4}+ cores recommended"
	else:
	# For other questions, generate a contextual summary
	answers[question] = self.generate_summary(combined_code, f"Context: {question}")

	return {
	"project_summary": summary,
	"tech_stack": tech_stack,
	"metrics": metrics,
	"objective": objective,
	"answers": answers
	}

	# if __name__ == "__main__":
	# analyzer = CodeAnalyzer()
	# # Example usage
	# results = analyzer.analyze_project(
	# "./example_project",
	# "./questions.txt"
	# )
	# print("\nProject Objective:", results["objective"])
	# print("\nTechnology Stack:")
	# for category, items in results["tech_stack"].items():
	# print(f"- {category.title()}: {', '.join(items)}")

	# print("\nCode Metrics:")
	# for metric, value in results["metrics"].items():
	# print(f"- {metric.replace('_', ' ').title()}: {value}")

	# print("\nAnswers to Questions:")
	# for q, a in results["answers"].items():
	# print(f"\n{q}:\n{a}")