Paraform_data / app.py
ak0601's picture
Update app.py
504c667 verified
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
import json
from firecrawl import FirecrawlApp
import gspread
import os
from dotenv import load_dotenv
import json
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY")
SHEET_ID = os.getenv("SHEET")
cred_dic = os.getenv("cred_dict")
cred_dict = json.loads(cred_dic)
# Setup Google Sheets connection (update the path and sheet name)
# genai ={
# "type": str(os.getenv("type")),
# "project_id": str(os.getenv("project_id")),
# "private_key_id": str(os.getenv("private_key_id")),
# "private_key": str(os.getenv("private_key")),
# "client_email": str(os.getenv("client_email")),
# "client_id": str(os.getenv("client_id")),
# "auth_uri": str(os.getenv("auth_uri")),
# "token_uri": str(os.getenv("token_uri")),
# "auth_provider_x509_cert_url": str(os.getenv("auth_provider_x509_cert_url")),
# "client_x509_cert_url": str(os.getenv("client_x509_cert_url")),
# "universe_domain": str(os.getenv("universe_domain"))
# }
gc = gspread.service_account_from_dict(cred_dict)
sh = gc.open_by_key(SHEET_ID) # Replace with your Google Sheet name
worksheet = sh.worksheet("S1") # Replace with your worksheet name if different
# Define your URL scraping function
def url_scrape(url):
app_scraper = FirecrawlApp(api_key=FIRECRAWL_API_KEY)
response = app_scraper.scrape_url(url=url, params={'formats': ['markdown']})
try:
return response
except Exception:
return response
# Define the structured output model for job description extraction
class JDE(BaseModel):
Role: str = Field(description="Title of the job")
Company: str = Field(description="Name of the company")
Requirements: str = Field(description="Requirements of the job. Provide a detailed overview of the ideal skills or tech stack required.")
Industry: str = Field(description="Type of Industry the job belongs to")
Type: str = Field(description="Working style (Remote, Hybrid, Onsite)")
Location: str = Field(description="Location of the company")
# The core function that processes the job input and appends data to Google Sheets
def fastapi_func(links, company, role, one_liner, reward, locations, tech_stack, workplace, salary, equity, yoe, team_size, funding, website):
# Scrape the job description from the provided link
jd = url_scrape(links)
# Create the prompt for the language model
system = (
"You are an expert job description writer. Your task is to structure the given web-scraped text into a properly sorted text and extract relevant information from it."
)
prompt_text = """
You are an expert job description writer. Your task is to restructure the given job description and extract relevant information.
Try to return your answer in JSON format based on the following structure:
{{
"Role": "Title of the job",
"Company": "Name of the company the job is about",
"Requirements": "Ideal skills or tech stack required. Provide a detailed overview.",
"Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)",
"Type": "Working style (Remote, Hybrid, Onsite)",
"Location": "Location of the company"
}}
Job Description: {jd}
"""
query_prompt = ChatPromptTemplate.from_messages([
("system", system),
("human", """
You are an expert job description writer. Your task is to restructure the given job description and extract relevant information.
Try to return your answer in JSON format based on the following structure:
{{
"Role": "Title of the job",
"Company": "Name of the company the job is about",
"Requirements": "Ideal skills or tech stack required. Provide a detailed overview.",
"Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)",
"Type": "Working style (Remote, Hybrid, Onsite)",
"Location": "Location of the company"
}}
Job Description: {job_description}
""")
])
# Initialize the language model and set it up for structured output using the JDE model
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY, temperature=0.81)
str_llm = llm.with_structured_output(JDE)
JDE_re = query_prompt | str_llm
# Invoke the language model to extract structured job details
q = JDE_re.invoke({"job_description": jd})
# Extract additional fields
req = q.Requirements
indus = q.Industry
# Prepare the row with all the data (append the two extra fields at the end)
row = [
links, company, role, one_liner, reward, locations,
tech_stack, workplace, salary, equity, yoe, team_size,
funding, website, req, indus
]
worksheet.append_row(row)
return q
# Define a Pydantic model for the API input
class JobInput(BaseModel):
links: str
company: str
role: str
one_liner: str
reward: str
locations: str
tech_stack: str
workplace: str
salary: str
equity: str
yoe: str
team_size: str
funding: str
website: str
# Create the FastAPI app instance
app = FastAPI()
@app.post("/create-job")
def create_job(job: JobInput):
try:
result = fastapi_func(
links=job.links,
company=job.company,
role=job.role,
one_liner=job.one_liner,
reward=job.reward,
locations=job.locations,
tech_stack=job.tech_stack,
workplace=job.workplace,
salary=job.salary,
equity=job.equity,
yoe=job.yoe,
team_size=job.team_size,
funding=job.funding,
website=job.website
)
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))