Spaces:

vijayvizag
/

code-to-doc-streamlit

Runtime error

App Files Files Community

vijayvizag commited on 10 days ago

Commit

be94910

1 Parent(s): a3e70bc

initial code commit

Browse files

Files changed (20) hide show

.gitattributes +0 -2
.gitignore +10 -0
Procfile +1 -0
README.md +37 -29
WELCOME.md +30 -0
app.py +127 -20
code_analyzer.py +231 -0
example_project/calculator.py +20 -0
headings.txt +0 -27
packages.txt +1 -0
project_presentation.pptx +0 -0
project_report.docx +0 -0
questions.txt +4 -0
requirements.txt +8 -8
sample_code/sample.py +0 -3
uploaded_code/sample.py +0 -3
utils/__pycache__/doc_generator.cpython-311.pyc +0 -0
utils/__pycache__/summarizer.cpython-311.pyc +0 -0
utils/doc_generator.py +0 -19
utils/summarizer.py +0 -20

.gitattributes CHANGED Viewed

@@ -23,9 +23,7 @@
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text

 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+__pycache__/
+*.py[cod]
+*$py.class
+.env
+.venv
+env/
+venv/
+ENV/
+.streamlit/
+.DS_Store

Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: streamlit run app.py

README.md CHANGED Viewed

@@ -1,36 +1,44 @@
----
-title: Code To Doc Streamlit
-emoji: 🚀
-colorFrom: gray
-colorTo: red
-sdk: streamlit
-sdk_version: 1.44.1
-app_file: app.py
-pinned: false
-license: apache-2.0
-short_description: doc gen
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
-# Code to Project Document Generator
-This is a Streamlit app that takes code files and a heading structure, and generates:
-- A DOCX project report
-- A PPTX presentation
-It uses lightweight models like T5 and tools like python-docx and pptx. Deployable on Hugging Face Spaces!
-## Usage
-1. Upload your code files (Python or React)
-2. Upload a `headings.txt` file with your report headings
-3. Click Generate
-## Output
-- `project_report.docx`
-- `project_presentation.pptx`
-## Run Locally
 ```bash
 pip install -r requirements.txt
 streamlit run app.py
 ```

+# Code Analyzer
+[![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://huggingface.co/spaces/YOUR_USERNAME/code-analyzer)
+This tool analyzes code projects and generates descriptive summaries along with answers to specific questions about the codebase.
+## Features
+- Analyzes Python, Java, and React code files
+- Detects technology stack and dependencies
+- Measures code complexity metrics
+- Generates project summaries using transformer models
+- Interactive Streamlit interface with visualizations
+- Provides targeted answers to specific questions about the codebase
+## Demo
+You can try the live demo on [Hugging Face Spaces](https://huggingface.co/spaces/YOUR_USERNAME/code-analyzer)
+## Local Setup
+1. Install dependencies:
 ```bash
 pip install -r requirements.txt
+```
+2. Run the Streamlit app:
+```bash
 streamlit run app.py
 ```
+## Usage
+1. Upload your code files (supported: .py, .java, .js, .jsx, .ts, .tsx)
+2. Enter your analysis questions (or use the default ones)
+3. Click "Analyze Code" to get insights about your project
+## Analysis Capabilities
+- Technology stack detection (languages, frameworks, dependencies)
+- Code metrics (lines of code, class/function count, complexity)
+- Project objective identification from documentation
+- Customizable question-answering system
+- Interactive visualizations of code metrics
+## Requirements
+- Python 3.8+
+- 4GB+ RAM
+- CUDA-capable GPU (optional, for faster processing)

WELCOME.md ADDED Viewed

	@@ -0,0 +1,30 @@

+# 🔍 Code Project Analyzer
+Welcome to the Code Project Analyzer! This tool helps you analyze your code projects using AI-powered insights.
+## 🚀 Features
+- Analyze Python, Java, and React code files
+- Detect technology stack and dependencies
+- Measure code complexity metrics
+- Generate project summaries
+- Get answers to specific questions about your code
+## 📝 How to Use
+1. Upload your code files (supported: .py, .java, .js, .jsx, .ts, .tsx)
+2. Enter your analysis questions or use the default ones
+3. Click "Analyze Code" to get insights about your project
+## 🎯 Example Questions
+- What is the project's abstract?
+- What is the system architecture?
+- What are the software requirements?
+- What are the hardware requirements?
+## 🔄 Results
+You'll get:
+- Project objective and tech stack overview
+- Interactive code metrics visualization
+- Detailed answers to your questions
+- Complexity assessment
+Made with ❤️ using Streamlit and Hugging Face

app.py CHANGED Viewed

@@ -1,29 +1,136 @@
 import streamlit as st
-from utils.summarizer import summarize_code
-from utils.doc_generator import generate_document, generate_pptx
 import os
-st.title("📄 Code to Project Document Generator")
-st.write("Upload your code files and a headings.txt file.")
-uploaded_files = st.file_uploader("Upload code files (Python/React)", accept_multiple_files=True)
-headings_file = st.file_uploader("Upload headings.txt", type="txt")
-if st.button("Generate Document") and uploaded_files and headings_file:
-    with open("headings.txt", "wb") as f:
-        f.write(headings_file.read())
-    code_dir = "uploaded_code"
-    os.makedirs(code_dir, exist_ok=True)
-    for file in uploaded_files:
-        with open(os.path.join(code_dir, file.name), "wb") as f:
-            f.write(file.read())
-    sections = summarize_code(code_dir, "headings.txt")
-    generate_document(sections)
-    generate_pptx(sections)
-    st.success("Documents generated!")
-    st.download_button("Download DOCX", data=open("project_report.docx", "rb"), file_name="project_report.docx")
-    st.download_button("Download PPTX", data=open("project_presentation.pptx", "rb"), file_name="project_presentation.pptx")

 import streamlit as st
 import os
+import tempfile
+import shutil
+from code_analyzer import CodeAnalyzer
+import plotly.express as px
+import pandas as pd
+st.set_page_config(
+    page_title="Code Analyzer",
+    page_icon="🔍",
+    layout="wide"
+)
+st.title("🔍 Code Project Analyzer")
+st.write("Upload your code files and analyze them with AI-powered insights")
+def create_metrics_chart(metrics):
+    """Create a bar chart for code metrics"""
+    df = pd.DataFrame({
+        'Metric': list(metrics.keys()),
+        'Value': list(metrics.values())
+    })
+    fig = px.bar(df, x='Metric', y='Value', title='Code Metrics')
+    return fig
+def display_tech_stack(tech_stack):
+    """Display technology stack in an organized way"""
+    st.subheader("🛠️ Technology Stack")
+    cols = st.columns(3)
+    with cols[0]:
+        st.write("**Languages**")
+        if tech_stack["languages"]:
+            for lang in tech_stack["languages"]:
+                st.write(f"- {lang}")
+        else:
+            st.write("No languages detected")
+    with cols[1]:
+        st.write("**Frameworks**")
+        if tech_stack["frameworks"]:
+            for framework in tech_stack["frameworks"]:
+                st.write(f"- {framework}")
+        else:
+            st.write("No frameworks detected")
+    with cols[2]:
+        st.write("**Dependencies**")
+        if tech_stack["dependencies"]:
+            for dep in tech_stack["dependencies"]:
+                st.write(f"- {dep}")
+        else:
+            st.write("No dependencies detected")
+def save_uploaded_files(uploaded_files):
+    """Save uploaded files to a temporary directory"""
+    temp_dir = tempfile.mkdtemp()
+    for uploaded_file in uploaded_files:
+        file_path = os.path.join(temp_dir, uploaded_file.name)
+        os.makedirs(os.path.dirname(file_path), exist_ok=True)
+        with open(file_path, "wb") as f:
+            f.write(uploaded_file.getbuffer())
+    return temp_dir
+# File upload section
+uploaded_files = st.file_uploader(
+    "Upload your code files",
+    accept_multiple_files=True,
+    type=['py', 'java', 'js', 'jsx', 'ts', 'tsx']
+)
+# Questions input
+st.subheader("📝 Analysis Questions")
+default_questions = """What is the project's abstract?
+What is the system architecture?
+What are the software requirements?
+What are the hardware requirements?"""
+questions = st.text_area(
+    "Enter your questions (one per line)",
+    value=default_questions,
+    height=150
+)
+analyze_button = st.button("🔍 Analyze Code")
+if analyze_button and uploaded_files:
+    with st.spinner("Analyzing your code..."):
+        # Save uploaded files
+        temp_dir = save_uploaded_files(uploaded_files)
+        # Save questions to a temporary file
+        questions_file = os.path.join(temp_dir, "questions.txt")
+        with open(questions_file, "w") as f:
+            f.write(questions)
+        try:
+            # Run analysis
+            analyzer = CodeAnalyzer()
+            results = analyzer.analyze_project(temp_dir, questions_file)
+            # Display results in tabs
+            tab1, tab2, tab3 = st.tabs(["📊 Overview", "💻 Code Metrics", "❓ Q&A"])
+            with tab1:
+                st.subheader("🎯 Project Objective")
+                st.write(results["objective"])
+                display_tech_stack(results["tech_stack"])
+            with tab2:
+                st.subheader("📊 Code Metrics")
+                metrics_chart = create_metrics_chart(results["metrics"])
+                st.plotly_chart(metrics_chart, use_container_width=True)
+                # Complexity assessment
+                complexity = "Low" if results["metrics"]["complexity_score"] < 10 else \
+                           "Medium" if results["metrics"]["complexity_score"] < 30 else "High"
+                st.info(f"Project Complexity: {complexity}")
+            with tab3:
+                st.subheader("❓ Analysis Results")
+                for question, answer in results["answers"].items():
+                    with st.expander(question):
+                        st.write(answer)
+        except Exception as e:
+            st.error(f"An error occurred during analysis: {str(e)}")
+        finally:
+            # Cleanup
+            shutil.rmtree(temp_dir)
+else:
+    if analyze_button:
+        st.warning("Please upload some code files first!")

code_analyzer.py ADDED Viewed

	@@ -0,0 +1,231 @@

+from transformers import pipeline
+import os
+import glob
+import ast
+import re
+from typing import List, Dict, Set, Any
+import pkg_resources
+import importlib.util
+from collections import defaultdict
+class CodeAnalyzer:
+    def __init__(self):
+        # Using different models for different types of analysis
+        self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+    def detect_technologies(self, code_files: Dict[str, str]) -> Dict[str, Any]:
+        """Detect technologies used in the project"""
+        tech_stack = {
+            "languages": set(),
+            "frameworks": set(),
+            "dependencies": set()
+        }
+        # Detect languages
+        extensions_map = {
+            '.py': 'Python',
+            '.js': 'JavaScript',
+            '.jsx': 'React/JavaScript',
+            '.ts': 'TypeScript',
+            '.tsx': 'React/TypeScript',
+            '.java': 'Java'
+        }
+        for file_path in code_files.keys():
+            ext = os.path.splitext(file_path)[1]
+            if ext in extensions_map:
+                tech_stack["languages"].add(extensions_map[ext])
+        # Analyze Python dependencies
+        for file_path, content in code_files.items():
+            if file_path.endswith('.py'):
+                try:
+                    tree = ast.parse(content)
+                    for node in ast.walk(tree):
+                        if isinstance(node, ast.Import):
+                            for name in node.names:
+                                tech_stack["dependencies"].add(name.name.split('.')[0])
+                        elif isinstance(node, ast.ImportFrom):
+                            if node.module:
+                                tech_stack["dependencies"].add(node.module.split('.')[0])
+                except:
+                    continue
+        # Check if common frameworks are used
+        framework_indicators = {
+            'django': 'Django',
+            'flask': 'Flask',
+            'fastapi': 'FastAPI',
+            'react': 'React',
+            'angular': 'Angular',
+            'vue': 'Vue.js',
+            'spring': 'Spring',
+            'tensorflow': 'TensorFlow',
+            'torch': 'PyTorch',
+            'pandas': 'Pandas',
+            'numpy': 'NumPy'
+        }
+        for dep in tech_stack["dependencies"]:
+            if dep.lower() in framework_indicators:
+                tech_stack["frameworks"].add(framework_indicators[dep.lower()])
+        return {k: list(v) for k, v in tech_stack.items()}
+    def analyze_code_complexity(self, code_files: Dict[str, str]) -> Dict[str, Any]:
+        """Analyze code complexity metrics"""
+        metrics = {
+            "total_lines": 0,
+            "code_lines": 0,
+            "class_count": 0,
+            "function_count": 0,
+            "complexity_score": 0
+        }
+        for file_path, content in code_files.items():
+            if file_path.endswith('.py'):
+                try:
+                    tree = ast.parse(content)
+                    metrics["class_count"] += sum(1 for node in ast.walk(tree) if isinstance(node, ast.ClassDef))
+                    metrics["function_count"] += sum(1 for node in ast.walk(tree) if isinstance(node, ast.FunctionDef))
+                    lines = content.split('\n')
+                    metrics["total_lines"] += len(lines)
+                    metrics["code_lines"] += sum(1 for line in lines if line.strip() and not line.strip().startswith('#'))
+                    # Simple complexity score based on nesting depth and branches
+                    complexity = 0
+                    for node in ast.walk(tree):
+                        if isinstance(node, (ast.If, ast.For, ast.While, ast.Try)):
+                            complexity += 1
+                    metrics["complexity_score"] += complexity
+                except:
+                    continue
+        return metrics
+    def identify_objective(self, code_files: Dict[str, str]) -> str:
+        """Identify the main objective of the project"""
+        # Combine all Python docstrings and comments
+        all_docs = []
+        for file_path, content in code_files.items():
+            if file_path.endswith('.py'):
+                try:
+                    tree = ast.parse(content)
+                    for node in ast.walk(tree):
+                        if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.Module)):
+                            if ast.get_docstring(node):
+                                all_docs.append(ast.get_docstring(node))
+                except:
+                    continue
+        combined_docs = " ".join(all_docs)
+        if combined_docs:
+            return self.summarizer(combined_docs, max_length=100, min_length=30, do_sample=False)[0]['summary_text']
+        return "Unable to determine project objective from available documentation"
+    def read_code_files(self, directory: str) -> Dict[str, str]:
+        """Read all code files from the given directory"""
+        code_files = {}
+        extensions = ['.py', '.java', '.jsx', '.js', '.ts', '.tsx']
+        for ext in extensions:
+            for file_path in glob.glob(f"{directory}/**/*{ext}", recursive=True):
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        code_files[file_path] = f.read()
+                except Exception as e:
+                    print(f"Error reading {file_path}: {e}")
+        return code_files
+    def generate_summary(self, code: str, context: str = "") -> str:
+        """Generate a summary for the given code with optional context"""
+        if not code.strip():
+            return "No code provided"
+        # Truncate input if too long
+        code = code[:4000]
+        prompt = f"{context}\n{code}" if context else code
+        summary = self.summarizer(prompt, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
+        return summary
+    def analyze_project(self, project_dir: str, questions_file: str) -> Dict[str, Any]:
+        """Analyze project and answer questions"""
+        # Read code files
+        code_files = self.read_code_files(project_dir)
+        if not code_files:
+            return {
+                "project_summary": "No code files found",
+                "tech_stack": {},
+                "metrics": {},
+                "objective": "No code files to analyze",
+                "answers": {}
+            }
+        # Perform various analyses
+        tech_stack = self.detect_technologies(code_files)
+        metrics = self.analyze_code_complexity(code_files)
+        objective = self.identify_objective(code_files)
+        # Generate overall summary
+        combined_code = "\n\n".join(code_files.values())
+        summary = self.generate_summary(combined_code)
+        # Read questions
+        with open(questions_file, 'r') as f:
+            questions = [line.strip() for line in f.readlines() if line.strip()]
+        # Generate targeted answers based on analysis results
+        answers = {}
+        for question in questions:
+            question_lower = question.lower()
+            if 'abstract' in question_lower:
+                answers[question] = objective
+            elif 'architecture' in question_lower:
+                arch_summary = f"Project Architecture:\n- Languages: {', '.join(tech_stack['languages'])}\n"
+                if tech_stack['frameworks']:
+                    arch_summary += f"- Frameworks: {', '.join(tech_stack['frameworks'])}\n"
+                arch_summary += f"- Components: {metrics['class_count']} classes, {metrics['function_count']} functions"
+                answers[question] = arch_summary
+            elif 'software' in question_lower and 'requirement' in question_lower:
+                deps = tech_stack['dependencies']
+                frameworks = tech_stack['frameworks']
+                req_list = list(set(deps) | set(frameworks))
+                answers[question] = f"Software Requirements:\n- Python environment\n- Dependencies: {', '.join(req_list)}"
+            elif 'hardware' in question_lower and 'requirement' in question_lower:
+                complexity = "Low" if metrics['complexity_score'] < 10 else "Medium" if metrics['complexity_score'] < 30 else "High"
+                answers[question] = f"Hardware Requirements:\n- Complexity: {complexity}\n- Minimum RAM: {2 if complexity == 'Low' else 4 if complexity == 'Medium' else 8}GB\n- CPU: {1 if complexity == 'Low' else 2 if complexity == 'Medium' else 4}+ cores recommended"
+            else:
+                # For other questions, generate a contextual summary
+                answers[question] = self.generate_summary(combined_code, f"Context: {question}")
+        return {
+            "project_summary": summary,
+            "tech_stack": tech_stack,
+            "metrics": metrics,
+            "objective": objective,
+            "answers": answers
+        }
+if __name__ == "__main__":
+    analyzer = CodeAnalyzer()
+    # Example usage
+    results = analyzer.analyze_project(
+        "./example_project",
+        "./questions.txt"
+    )
+    print("\nProject Objective:", results["objective"])
+    print("\nTechnology Stack:")
+    for category, items in results["tech_stack"].items():
+        print(f"- {category.title()}: {', '.join(items)}")
+    print("\nCode Metrics:")
+    for metric, value in results["metrics"].items():
+        print(f"- {metric.replace('_', ' ').title()}: {value}")
+    print("\nAnswers to Questions:")
+    for q, a in results["answers"].items():
+        print(f"\n{q}:\n{a}")

example_project/calculator.py ADDED Viewed

	@@ -0,0 +1,20 @@

+class Calculator:
+    """A simple calculator implementation with basic arithmetic operations."""
+    def add(self, x: float, y: float) -> float:
+        """Add two numbers."""
+        return x + y
+    def subtract(self, x: float, y: float) -> float:
+        """Subtract y from x."""
+        return x - y
+    def multiply(self, x: float, y: float) -> float:
+        """Multiply two numbers."""
+        return x * y
+    def divide(self, x: float, y: float) -> float:
+        """Divide x by y."""
+        if y == 0:
+            raise ValueError("Cannot divide by zero")
+        return x / y

headings.txt DELETED Viewed

@@ -1,27 +0,0 @@
-ABSTRACT
-Introduction
-Python
-Machine Learning
-Deep Learning
-Image Classification
-Working of Image Classification
-Applications of Image Classification
-Architecture
-Data Flow Diagram
-Image Classification Techniques
-Aim of the Project
-Scope
-System Requirements
-Hardware Requirements
-Software Requirements
-Setup Instructions
-Algorithms
-Performance Evaluation
-Comparison of Base Line Models
-Error Analysis
-Methodology
-Results
-Discussion
-CONCLUSION
-REFERENCES
-FUTURE WORK

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python-dev

project_presentation.pptx DELETED Viewed

Binary file (51.7 kB)

project_report.docx DELETED Viewed

Binary file (37 kB)

questions.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+What is the project's abstract?
+What is the system architecture?
+What are the software requirements?
+What are the hardware requirements?

requirements.txt CHANGED Viewed

@@ -1,8 +1,8 @@
-streamlit
-transformers
-sentencepiece
-python-docx
-python-pptx
-plantuml
-huggingface-hub
-torch

+transformers[torch]==4.35.0
+--extra-index-url https://download.pytorch.org/whl/cpu
+torch>=2.0.0
+numpy>=1.24.0
+pandas>=2.0.0
+streamlit>=1.30.0
+plotly>=5.18.0
+altair>=5.2.0

sample_code/sample.py DELETED Viewed

@@ -1,3 +0,0 @@
-# Sample Python file
-def greet(name):
-    return f"Hello, {name}!"

uploaded_code/sample.py DELETED Viewed

@@ -1,3 +0,0 @@
-# Sample Python file
-def greet(name):
-    return f"Hello, {name}!"

utils/__pycache__/doc_generator.cpython-311.pyc DELETED Viewed

Binary file (1.64 kB)

utils/__pycache__/summarizer.cpython-311.pyc DELETED Viewed

Binary file (1.94 kB)

utils/doc_generator.py DELETED Viewed

@@ -1,19 +0,0 @@
-from docx import Document
-from pptx import Presentation
-from pptx.util import Inches
-def generate_document(sections):
-    doc = Document()
-    doc.add_heading("Project Report", 0)
-    for title, content in sections.items():
-        doc.add_heading(title, level=1)
-        doc.add_paragraph(content)
-    doc.save("project_report.docx")
-def generate_pptx(sections):
-    prs = Presentation()
-    for title, content in sections.items():
-        slide = prs.slides.add_slide(prs.slide_layouts[1])
-        slide.shapes.title.text = title
-        slide.placeholders[1].text = content
-    prs.save("project_presentation.pptx")

utils/summarizer.py DELETED Viewed

@@ -1,20 +0,0 @@
-from transformers import pipeline
-import os
-summarizer = pipeline("summarization", model="t5-small")
-def summarize_code(code_dir, headings_path):
-    sections = {}
-    with open(headings_path, "r") as hfile:
-        headings = [line.strip() for line in hfile if line.strip()]
-    for heading in headings:
-        combined_code = ""
-        for root, _, files in os.walk(code_dir):
-            for file in files:
-                with open(os.path.join(root, file), "r", encoding="utf-8", errors="ignore") as f:
-                    combined_code += f.read() + "\n"
-        summary = summarizer(combined_code[:1000], max_length=120, min_length=30, do_sample=False)[0]["summary_text"]
-        sections[heading] = summary
-    return sections