Spaces:

shekkari21
/

codereviewer

Running

File size: 5,450 Bytes

2a21e9f

from fastapi import FastAPI, Request, Form
from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
from pydantic import BaseModel
from typing import List
from clearml import Model
import torch
from configs import add_args
from models import build_or_load_gen_model
import argparse
from argparse import Namespace
import os
from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig

MAX_SOURCE_LENGTH = 512

def pad_assert(tokenizer, source_ids):
    source_ids = source_ids[:MAX_SOURCE_LENGTH - 2]
    source_ids = [tokenizer.bos_id] + source_ids + [tokenizer.eos_id]
    pad_len = MAX_SOURCE_LENGTH - len(source_ids)
    source_ids += [tokenizer.pad_id] * pad_len
    assert len(source_ids) == MAX_SOURCE_LENGTH, "Not equal length."
    return source_ids

# Encode code content and comment into model input
def encode_diff(tokenizer, code, comment):
    # Tokenize code file content
    code_ids = tokenizer.encode(code, max_length=MAX_SOURCE_LENGTH, truncation=True)[1:-1]
    # Tokenize comment
    comment_ids = tokenizer.encode(comment, max_length=MAX_SOURCE_LENGTH, truncation=True)[1:-1]
    # Concatenate: [BOS] + code + [EOS] + [msg_id] + comment
    source_ids = [tokenizer.bos_id] + code_ids + [tokenizer.eos_id]
    source_ids += [tokenizer.msg_id] + comment_ids
    # Pad/truncate to fixed length
    source_ids = source_ids[:MAX_SOURCE_LENGTH - 2]
    source_ids = [tokenizer.bos_id] + source_ids + [tokenizer.eos_id]
    pad_len = MAX_SOURCE_LENGTH - len(source_ids)
    source_ids += [tokenizer.pad_id] * pad_len
    assert len(source_ids) == MAX_SOURCE_LENGTH, "Not equal length."
    return source_ids

# Load base model architecture and tokenizer from HuggingFace
BASE_MODEL_NAME = "microsoft/codereviewer"
args = Namespace(
    model_name_or_path=BASE_MODEL_NAME,
    load_model_path=None,
    # Add other necessary default arguments if build_or_load_gen_model requires them
)
print(f"Loading base model architecture and tokenizer from: {BASE_MODEL_NAME}")
config, base_model, tokenizer = build_or_load_gen_model(args)
print("Base model architecture and tokenizer loaded.")

# Download the fine-tuned weights from ClearML
CLEARML_MODEL_ID = "34e25deb24c64b74b29c8519ed15fe3e"
model_obj = Model(model_id=CLEARML_MODEL_ID)
finetuned_weights_path = model_obj.get_local_copy()
adapter_dir = os.path.dirname(finetuned_weights_path)

print(f"Fine-tuned adapter weights downloaded to directory: {adapter_dir}")

# Create LoRA configuration matching the fine-tuned checkpoint
lora_cfg = LoraConfig(
    r=64,
    lora_alpha=128,
    target_modules=["q", "wo", "wi", "v", "o", "k"],
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_2_SEQ_LM"
)
# Wrap base model with PEFT LoRA
peft_model = get_peft_model(base_model, lora_cfg)
# Load adapter-only weights and merge into base
adapter_state = torch.load(finetuned_weights_path, map_location="cpu")
peft_model.load_state_dict(adapter_state, strict=False)
model = peft_model.merge_and_unload()
print("Merged base model with LoRA adapters.")

model.to("cpu")
model.eval()
print("Model ready for inference.")

app = FastAPI()

last_payload = {"comment": "", "files": []}
last_infer_result = {"generated_code": ""}

class FileContent(BaseModel):
    filename: str
    content: str

class PRPayload(BaseModel):
    comment: str
    files: List[FileContent]

class InferenceRequest(BaseModel):
    comment: str
    files: List[FileContent]


@app.get("/")
def root():
    return {"message": "FastAPI PR comment service is running"}

@app.post("/pr-comments")
async def receive_pr_comment(payload: PRPayload):
    global last_payload
    last_payload = payload.dict()
    # Return the received payload as JSON and also redirect to /show
    return JSONResponse(content={"status": "received", "payload": last_payload, "redirect": "/show"})

@app.get("/show", response_class=HTMLResponse)
def show_last_comment():
    html = f"<h2>Received Comment</h2><p>{last_payload['comment']}</p><hr>"
    for file in last_payload["files"]:
        html += f"<h3>{file['filename']}</h3><pre>{file['content']}</pre><hr>"
    return html

@app.post("/infer")
async def infer(request: InferenceRequest):
    global last_infer_result
    print("[DEBUG] Received /infer request with:", request.dict())
    
    code = request.files[0].content if request.files else ""
    source_ids = encode_diff(tokenizer, code, request.comment)
    # print("[DEBUG] source_ids:", source_ids)
    #tokens = [tokenizer.decode([sid], skip_special_tokens=False) for sid in source_ids]
    #print("[DEBUG] tokens:", tokens)
    inputs = torch.tensor([source_ids], dtype=torch.long)
    inputs_mask = inputs.ne(tokenizer.pad_id)
    
    preds = model.generate(
        inputs,
        attention_mask=inputs_mask,
        use_cache=True,
        num_beams=5,
        early_stopping=True,
        max_length=100,
        num_return_sequences=1
    )
    
    pred = preds[0].cpu().numpy()
    pred_nl = tokenizer.decode(pred[2:], skip_special_tokens=True, clean_up_tokenization_spaces=False)
    last_infer_result = {"generated_code": pred_nl}
    return last_infer_result

@app.get("/show-infer", response_class=HTMLResponse)
def show_infer_result():
    html = f"<h2>Generated Message</h2><pre>{last_infer_result['generated_code']}</pre>"
    return html

if __name__ == "__main__":
    # Place any CLI/training logic here if needed
    # This block is NOT executed when running with uvicorn
    pass