Spaces:
Paused
Paused
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel, Field | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
from langchain_core.prompts import ChatPromptTemplate | |
import json | |
from firecrawl import FirecrawlApp | |
import gspread | |
import os | |
from dotenv import load_dotenv | |
import json | |
load_dotenv() | |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY") | |
SHEET_ID = os.getenv("SHEET") | |
cred_dic = os.getenv("cred_dict") | |
cred_dict = json.loads(cred_dic) | |
# Setup Google Sheets connection (update the path and sheet name) | |
# genai ={ | |
# "type": str(os.getenv("type")), | |
# "project_id": str(os.getenv("project_id")), | |
# "private_key_id": str(os.getenv("private_key_id")), | |
# "private_key": str(os.getenv("private_key")), | |
# "client_email": str(os.getenv("client_email")), | |
# "client_id": str(os.getenv("client_id")), | |
# "auth_uri": str(os.getenv("auth_uri")), | |
# "token_uri": str(os.getenv("token_uri")), | |
# "auth_provider_x509_cert_url": str(os.getenv("auth_provider_x509_cert_url")), | |
# "client_x509_cert_url": str(os.getenv("client_x509_cert_url")), | |
# "universe_domain": str(os.getenv("universe_domain")) | |
# } | |
gc = gspread.service_account_from_dict(cred_dict) | |
sh = gc.open_by_key(SHEET_ID) # Replace with your Google Sheet name | |
worksheet = sh.worksheet("S1") # Replace with your worksheet name if different | |
# Define your URL scraping function | |
def url_scrape(url): | |
app_scraper = FirecrawlApp(api_key=FIRECRAWL_API_KEY) | |
response = app_scraper.scrape_url(url=url, params={'formats': ['markdown']}) | |
try: | |
return response | |
except Exception: | |
return response | |
# Define the structured output model for job description extraction | |
class JDE(BaseModel): | |
Role: str = Field(description="Title of the job") | |
Company: str = Field(description="Name of the company") | |
Requirements: str = Field(description="Requirements of the job. Provide a detailed overview of the ideal skills or tech stack required.") | |
Industry: str = Field(description="Type of Industry the job belongs to") | |
Type: str = Field(description="Working style (Remote, Hybrid, Onsite)") | |
Location: str = Field(description="Location of the company") | |
# The core function that processes the job input and appends data to Google Sheets | |
def fastapi_func(links, company, role, one_liner, reward, locations, tech_stack, workplace, salary, equity, yoe, team_size, funding, website): | |
# Scrape the job description from the provided link | |
jd = url_scrape(links) | |
# Create the prompt for the language model | |
system = ( | |
"You are an expert job description writer. Your task is to structure the given web-scraped text into a properly sorted text and extract relevant information from it." | |
) | |
prompt_text = """ | |
You are an expert job description writer. Your task is to restructure the given job description and extract relevant information. | |
Try to return your answer in JSON format based on the following structure: | |
{{ | |
"Role": "Title of the job", | |
"Company": "Name of the company the job is about", | |
"Requirements": "Ideal skills or tech stack required. Provide a detailed overview.", | |
"Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)", | |
"Type": "Working style (Remote, Hybrid, Onsite)", | |
"Location": "Location of the company" | |
}} | |
Job Description: {jd} | |
""" | |
query_prompt = ChatPromptTemplate.from_messages([ | |
("system", system), | |
("human", """ | |
You are an expert job description writer. Your task is to restructure the given job description and extract relevant information. | |
Try to return your answer in JSON format based on the following structure: | |
{{ | |
"Role": "Title of the job", | |
"Company": "Name of the company the job is about", | |
"Requirements": "Ideal skills or tech stack required. Provide a detailed overview.", | |
"Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)", | |
"Type": "Working style (Remote, Hybrid, Onsite)", | |
"Location": "Location of the company" | |
}} | |
Job Description: {job_description} | |
""") | |
]) | |
# Initialize the language model and set it up for structured output using the JDE model | |
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY, temperature=0.81) | |
str_llm = llm.with_structured_output(JDE) | |
JDE_re = query_prompt | str_llm | |
# Invoke the language model to extract structured job details | |
q = JDE_re.invoke({"job_description": jd}) | |
# Extract additional fields | |
req = q.Requirements | |
indus = q.Industry | |
# Prepare the row with all the data (append the two extra fields at the end) | |
row = [ | |
links, company, role, one_liner, reward, locations, | |
tech_stack, workplace, salary, equity, yoe, team_size, | |
funding, website, req, indus | |
] | |
worksheet.append_row(row) | |
return q | |
# Define a Pydantic model for the API input | |
class JobInput(BaseModel): | |
links: str | |
company: str | |
role: str | |
one_liner: str | |
reward: str | |
locations: str | |
tech_stack: str | |
workplace: str | |
salary: str | |
equity: str | |
yoe: str | |
team_size: str | |
funding: str | |
website: str | |
# Create the FastAPI app instance | |
app = FastAPI() | |
def create_job(job: JobInput): | |
try: | |
result = fastapi_func( | |
links=job.links, | |
company=job.company, | |
role=job.role, | |
one_liner=job.one_liner, | |
reward=job.reward, | |
locations=job.locations, | |
tech_stack=job.tech_stack, | |
workplace=job.workplace, | |
salary=job.salary, | |
equity=job.equity, | |
yoe=job.yoe, | |
team_size=job.team_size, | |
funding=job.funding, | |
website=job.website | |
) | |
return result | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |