code-to-doc-streamlit / utils /summarizer.py
vijayvizag's picture
Working local version - pushing to HF Spaces
a3e70bc
raw
history blame
741 Bytes
from transformers import pipeline
import os
summarizer = pipeline("summarization", model="t5-small")
def summarize_code(code_dir, headings_path):
sections = {}
with open(headings_path, "r") as hfile:
headings = [line.strip() for line in hfile if line.strip()]
for heading in headings:
combined_code = ""
for root, _, files in os.walk(code_dir):
for file in files:
with open(os.path.join(root, file), "r", encoding="utf-8", errors="ignore") as f:
combined_code += f.read() + "\n"
summary = summarizer(combined_code[:1000], max_length=120, min_length=30, do_sample=False)[0]["summary_text"]
sections[heading] = summary
return sections