File size: 5,978 Bytes
71d720b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
afcae86
504c667
 
71d720b
504c667
 
 
 
 
 
 
 
 
 
 
 
 
 
71d720b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
import json
from firecrawl import FirecrawlApp
import gspread
import os   
from dotenv import load_dotenv
import json

load_dotenv()

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY")
SHEET_ID = os.getenv("SHEET")
cred_dic = os.getenv("cred_dict")
cred_dict = json.loads(cred_dic)
# Setup Google Sheets connection (update the path and sheet name)
# genai ={
#   "type": str(os.getenv("type")),
#   "project_id": str(os.getenv("project_id")),
#   "private_key_id": str(os.getenv("private_key_id")),
#   "private_key": str(os.getenv("private_key")),
#   "client_email": str(os.getenv("client_email")),
#   "client_id": str(os.getenv("client_id")),
#   "auth_uri": str(os.getenv("auth_uri")),
#   "token_uri": str(os.getenv("token_uri")),
#   "auth_provider_x509_cert_url": str(os.getenv("auth_provider_x509_cert_url")),
#   "client_x509_cert_url": str(os.getenv("client_x509_cert_url")),
#   "universe_domain": str(os.getenv("universe_domain"))
# }
gc = gspread.service_account_from_dict(cred_dict)
sh = gc.open_by_key(SHEET_ID)  # Replace with your Google Sheet name
worksheet = sh.worksheet("S1")   # Replace with your worksheet name if different

# Define your URL scraping function
def url_scrape(url):
    app_scraper = FirecrawlApp(api_key=FIRECRAWL_API_KEY)
    response = app_scraper.scrape_url(url=url, params={'formats': ['markdown']})
    try:
        return response
    except Exception:
        return response

# Define the structured output model for job description extraction
class JDE(BaseModel):
    Role: str = Field(description="Title of the job")
    Company: str = Field(description="Name of the company")
    Requirements: str = Field(description="Requirements of the job. Provide a detailed overview of the ideal skills or tech stack required.")
    Industry: str = Field(description="Type of Industry the job belongs to")
    Type: str = Field(description="Working style (Remote, Hybrid, Onsite)")
    Location: str = Field(description="Location of the company")

# The core function that processes the job input and appends data to Google Sheets
def fastapi_func(links, company, role, one_liner, reward, locations, tech_stack, workplace, salary, equity, yoe, team_size, funding, website):
    # Scrape the job description from the provided link
    jd = url_scrape(links)
    
    # Create the prompt for the language model
    system = (
        "You are an expert job description writer. Your task is to structure the given web-scraped text into a properly sorted text and extract relevant information from it."
    )
    prompt_text = """
You are an expert job description writer. Your task is to restructure the given job description and extract relevant information.
Try to return your answer in JSON format based on the following structure:
{{
    "Role": "Title of the job",
    "Company": "Name of the company the job is about",
    "Requirements": "Ideal skills or tech stack required. Provide a detailed overview.",
    "Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)",
    "Type": "Working style (Remote, Hybrid, Onsite)",
    "Location": "Location of the company"
}}
Job Description: {jd}
    """
    
    query_prompt = ChatPromptTemplate.from_messages([
        ("system", system),
        ("human", """
You are an expert job description writer. Your task is to restructure the given job description and extract relevant information.
Try to return your answer in JSON format based on the following structure:
{{
    "Role": "Title of the job",
    "Company": "Name of the company the job is about",
    "Requirements": "Ideal skills or tech stack required. Provide a detailed overview.",
    "Industry": "Industry of the job (Tech, Finance, Management, Commerce, Engineering, etc)",
    "Type": "Working style (Remote, Hybrid, Onsite)",
    "Location": "Location of the company"
}}
Job Description: {job_description}
    """)
    ])
    
    # Initialize the language model and set it up for structured output using the JDE model
    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY, temperature=0.81)
    str_llm = llm.with_structured_output(JDE)
    JDE_re = query_prompt | str_llm
    # Invoke the language model to extract structured job details
    q = JDE_re.invoke({"job_description": jd})
    
    # Extract additional fields
    req = q.Requirements
    indus = q.Industry
    
    # Prepare the row with all the data (append the two extra fields at the end)
    row = [
        links, company, role, one_liner, reward, locations,
        tech_stack, workplace, salary, equity, yoe, team_size,
        funding, website, req, indus
    ]
    worksheet.append_row(row)
    
    return q

# Define a Pydantic model for the API input
class JobInput(BaseModel):
    links: str
    company: str
    role: str
    one_liner: str
    reward: str
    locations: str
    tech_stack: str
    workplace: str
    salary: str
    equity: str
    yoe: str
    team_size: str
    funding: str
    website: str

# Create the FastAPI app instance
app = FastAPI()

@app.post("/create-job")
def create_job(job: JobInput):
    try:
        result = fastapi_func(
            links=job.links,
            company=job.company,
            role=job.role,
            one_liner=job.one_liner,
            reward=job.reward,
            locations=job.locations,
            tech_stack=job.tech_stack,
            workplace=job.workplace,
            salary=job.salary,
            equity=job.equity,
            yoe=job.yoe,
            team_size=job.team_size,
            funding=job.funding,
            website=job.website
        )
        return result
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))