File size: 1,465 Bytes
fa6dfc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import torch
from transformers import CodeT5ForCodeGeneration, CodeT5Tokenizer

class CodeT5:
    def __init__(self):
        self.tokenizer = CodeT5Tokenizer.from_pretrained('Salesforce/codet5-base')
        self.model = CodeT5ForCodeGeneration.from_pretrained('Salesforce/codet5-base')

    def analyze(self, repo_data, github_api):
        if isinstance(repo_data, str): # Error handling from github_api
            return repo_data
        optimization_results = []
        for file in repo_data:
            if file["type"] == "file" and file["name"].endswith((".py", ".js", ".java", ".c", ".cpp")):
                content = github_api.get_file_content(file["download_url"])
                if isinstance(content, str) and content.startswith("Error"): #Error Handling for file content.
                    optimization_results.append(f"{file['name']}: {content}")
                    continue
                try:
                    inputs = self.tokenizer.encode(content, return_tensors="pt", max_length=512, truncation=True)
                    outputs = self.model.generate(inputs, max_length=256)
                    decoded_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
                    optimization_results.append(f"{file['name']}: {decoded_output}")
                except Exception as e:
                    optimization_results.append(f"{file['name']}: Error analyzing - {e}")
        return "\n".join(optimization_results)