--- library_name: transformers license: apache-2.0 base_model: - Qwen/Qwen2.5-Coder-3B-Instruct --- # Model Card for Model ID Generates and Edits minimal multi-file python code. Right now consistently generates upto 2-3 files with a runner.sh bash script that orchestrates the file. Maintains the PEP-8 style. ## Model Details ### Model Description - **Developed by:** Reshinth Adithyan - **License:** Apache 2.0 ### Model Sources [optional] - **Repository:** https://github.com/reshinthadithyan/repo-level-code/tree/main ### Generated Format The model generates the repository in the following format, Code to parse it and make a repository is also given below ```txt pytorch,wandb PLANNING AS MARKDOWN FORMAT >CONTENT FOR THE REQS FILE HERE src/dataset.pyYOUR PYTHON CODE HERE src/model.pyYOUR PYTHON CODE HERE run.shpython3 src/model.py ``` ## Example An Example generated code is given [here](https://huggingface.co/reshinthadith/local-repo-coder-v0/blob/main/example.txt). This using the script below is processed to, ```text Repository generated at: ./output_dir/demo2 demo2/ run.sh src/ visualize_timeseries.py ``` ### Usage ```python import torch from transformers import AutoModelForCausalLM, AutoTokenizer import fire from pathlib import Path import os import re def generate_repo_from_string(input_str: str, output_dir: str) -> None: """ Parse tags in the input string and write files (and bashfiles) to the specified output directory. - Searches for ... section. - Within that, finds all or tags: path/to/file.ext...file content... script.sh...script content... Args: input_str: The full string containing markup. output_dir: Directory where files will be created. Existing files will be overwritten. """ # Extract the content inside ... out_match = re.search(r"(.*?)", input_str, re.DOTALL) if not out_match: raise ValueError("No section found in input.") output_section = out_match.group(1) # Regex to find file tags: file1, file2, file3, ... and bashfile pattern = re.compile( r"<(file\d+|bashfile)>([^<]+?)(.*?)", re.DOTALL ) for tag, filename, content in pattern.findall(output_section): # Determine full path file_path = os.path.join(output_dir, filename.strip()) # Ensure parent directory exists parent = os.path.dirname(file_path) if parent: os.makedirs(parent, exist_ok=True) # Write content to file with open(file_path, 'w', encoding='utf-8') as f: # Strip only one leading newline if present f.write(content.lstrip('\n')) print(f"Repository generated at: {output_dir}") def main(model_path:str="./models_dir/repo_coder_v1", prompt:str="Generate a small python repo for matplotlib to visualize timeseries data to read from timeseries.csv file using polars." ,output_path="./output_dir/demo2"): input_prompt = "###Instruction: {prompt}".format(prompt=prompt) def load_model(model_path): """ Load the model and tokenizer from the specified path. """ tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype="auto").to("cuda:0") model.eval() return model, tokenizer model, tokenizer = load_model(model_path) print(f"Loaded model from {model_path}.") input = tokenizer(input_prompt, return_tensors="pt").to(model.device) with torch.no_grad(): output = model.generate(**input, max_length=1024, do_sample=True, temperature=0.9, top_p=0.95, top_k=50) generated_code_repo = tokenizer.decode(output[0], skip_special_tokens=True) print(f"Generated code repo: {generated_code_repo}") Path(output_path).mkdir(parents=True, exist_ok=True) generate_repo_from_string(generated_code_repo, output_path) def list_files(startpath): for root, dirs, files in os.walk(startpath): level = root.replace(startpath, '').count(os.sep) indent = ' ' * 4 * (level) print('{}{}/'.format(indent, os.path.basename(root))) subindent = ' ' * 4 * (level + 1) for f in files: print('{}{}'.format(subindent, f)) list_files(output_path) if __name__ == "__main__": fire.Fire(main) ```