Spaces:
Paused
Paused
File size: 5,978 Bytes
71d720b afcae86 504c667 71d720b 504c667 71d720b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
import json
from firecrawl import FirecrawlApp
import gspread
import os
from dotenv import load_dotenv
import json
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY")
SHEET_ID = os.getenv("SHEET")
cred_dic = os.getenv("cred_dict")
cred_dict = json.loads(cred_dic)
# Setup Google Sheets connection (update the path and sheet name)
# genai ={
# "type": str(os.getenv("type")),
# "project_id": str(os.getenv("project_id")),
# "private_key_id": str(os.getenv("private_key_id")),
# "private_key": str(os.getenv("private_key")),
# "client_email": str(os.getenv("client_email")),
# "client_id": str(os.getenv("client_id")),
# "auth_uri": str(os.getenv("auth_uri")),
# "token_uri": str(os.getenv("token_uri")),
# "auth_provider_x509_cert_url": str(os.getenv("auth_provider_x509_cert_url")),
# "client_x509_cert_url": str(os.getenv("client_x509_cert_url")),
# "universe_domain": str(os.getenv("universe_domain"))
# }
gc = gspread.service_account_from_dict(cred_dict)
sh = gc.open_by_key(SHEET_ID) # Replace with your Google Sheet name
worksheet = sh.worksheet("S1") # Replace with your worksheet name if different
# Define your URL scraping function
def url_scrape(url):
app_scraper = FirecrawlApp(api_key=FIRECRAWL_API_KEY)
response = app_scraper.scrape_url(url=url, params={'formats': ['markdown']})
try:
return response
except Exception:
return response
# Define the structured output model for job description extraction
class JDE(BaseModel):
Role: str = Field(description="Title of the job")
Company: str = Field(description="Name of the company")
Requirements: str = Field(description="Requirements of the job. Provide a detailed overview of the ideal skills or tech stack required.")
Industry: str = Field(description="Type of Industry the job belongs to")
Type: str = Field(description="Working style (Remote, Hybrid, Onsite)")
Location: str = Field(description="Location of the company")
# The core function that processes the job input and appends data to Google Sheets
def fastapi_func(links, company, role, one_liner, reward, locations, tech_stack, workplace, salary, equity, yoe, team_size, funding, website):
# Scrape the job description from the provided link
jd = url_scrape(links)
# Create the prompt for the language model
system = (
"You are an expert job description writer. Your task is to structure the given web-scraped text into a properly sorted text and extract relevant information from it."
)
prompt_text = """
You are an expert job description writer. Your task is to restructure the given job description and extract relevant information.
Try to return your answer in JSON format based on the following structure:
{{
"Role": "Title of the job",
"Company": "Name of the company the job is about",
"Requirements": "Ideal skills or tech stack required. Provide a detailed overview.",
"Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)",
"Type": "Working style (Remote, Hybrid, Onsite)",
"Location": "Location of the company"
}}
Job Description: {jd}
"""
query_prompt = ChatPromptTemplate.from_messages([
("system", system),
("human", """
You are an expert job description writer. Your task is to restructure the given job description and extract relevant information.
Try to return your answer in JSON format based on the following structure:
{{
"Role": "Title of the job",
"Company": "Name of the company the job is about",
"Requirements": "Ideal skills or tech stack required. Provide a detailed overview.",
"Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)",
"Type": "Working style (Remote, Hybrid, Onsite)",
"Location": "Location of the company"
}}
Job Description: {job_description}
""")
])
# Initialize the language model and set it up for structured output using the JDE model
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY, temperature=0.81)
str_llm = llm.with_structured_output(JDE)
JDE_re = query_prompt | str_llm
# Invoke the language model to extract structured job details
q = JDE_re.invoke({"job_description": jd})
# Extract additional fields
req = q.Requirements
indus = q.Industry
# Prepare the row with all the data (append the two extra fields at the end)
row = [
links, company, role, one_liner, reward, locations,
tech_stack, workplace, salary, equity, yoe, team_size,
funding, website, req, indus
]
worksheet.append_row(row)
return q
# Define a Pydantic model for the API input
class JobInput(BaseModel):
links: str
company: str
role: str
one_liner: str
reward: str
locations: str
tech_stack: str
workplace: str
salary: str
equity: str
yoe: str
team_size: str
funding: str
website: str
# Create the FastAPI app instance
app = FastAPI()
@app.post("/create-job")
def create_job(job: JobInput):
try:
result = fastapi_func(
links=job.links,
company=job.company,
role=job.role,
one_liner=job.one_liner,
reward=job.reward,
locations=job.locations,
tech_stack=job.tech_stack,
workplace=job.workplace,
salary=job.salary,
equity=job.equity,
yoe=job.yoe,
team_size=job.team_size,
funding=job.funding,
website=job.website
)
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
|