Skip to content

Instantly share code, notes, and snippets.

@hamedn
Created July 28, 2025 17:48
Show Gist options
  • Save hamedn/b8bfc56afa91a3f397d8725e74596cf2 to your computer and use it in GitHub Desktop.
Save hamedn/b8bfc56afa91a3f397d8725e74596cf2 to your computer and use it in GitHub Desktop.
HiringCafe Prompt
export const gpt_functions_param_jobs_fetching = [
{
name: "process_job_data",
description: "Process job data and extract core fields for a scraper.",
parameters: {
type: "object",
additionalProperties: false,
properties: {
// Company
company_name: {
type: ["string", "null"],
description: "Organization posting the job."
},
company_website: {
type: ["string", "null"],
description: "Base website URL (protocol + domain)."
},
// Role basics
title: {
type: "string",
description: "Canonical job title (no seniority or location descriptors)."
},
category: {
type: ["string", "null"],
description: "High-level function of the role.",
enum: [
"Software Development",
"Engineering",
"Information Technology",
"Product Management",
"Project and Program Management",
"Design",
"Data and Analytics",
"Sales",
"Marketing",
"Customer Service",
"Business Operations",
"Finance and Accounting",
"Human Resources",
"Legal and Compliance",
"Healthcare",
"Other"
]
},
employment_type: {
type: "string",
description: "Employment commitment.",
enum: [
"Full Time",
"Part Time",
"Contract",
"Internship",
"Temporary",
"Seasonal",
"Volunteer"
]
},
workplace_type: {
type: "string",
description: "Remote/Onsite/Hybrid.",
enum: ["Remote", "Onsite", "Hybrid"]
},
locations: {
type: "array",
description: "Free-text location names (e.g., 'San Francisco, CA, US').",
items: { type: "string" }
},
// Compensation (if present)
salary: {
type: "object",
additionalProperties: false,
properties: {
min: { type: ["number", "null"], description: "Low end of range." },
max: { type: ["number", "null"], description: "High end of range." },
currency: {
type: ["string", "null"],
description: "ISO 4217 code (e.g., 'USD')."
},
frequency: {
type: ["string", "null"],
description: "Pay period.",
enum: ["Yearly", "Monthly", "Bi-Weekly", "Weekly", "Daily", "Hourly", null]
}
},
required: ["min", "max", "currency", "frequency"]
},
// Content & signals
description_text: {
type: ["string", "null"],
description: "Plain-text job description."
},
requirements_summary: {
type: ["string", "null"],
description: "≤250 char summary of explicit requirements."
},
skills: {
type: "array",
description: "Normalized tools/skills mentioned.",
items: { type: "string" }
},
experience_min_years: {
type: ["number", "null"],
description: "Minimum years of experience, if stated."
},
// Metadata & provenance
posted_at: {
type: ["string", "null"],
description: "ISO 8601 date the job was posted, if available."
},
apply_url: {
type: ["string", "null"],
description: "Direct application URL, if available."
},
source_url: {
type: "string",
description: "Canonical URL of the job posting."
},
source_name: {
type: ["string", "null"],
description: "Site or board the job was scraped from (e.g., 'Greenhouse')."
},
language: {
type: ["string", "null"],
description: "ISO 639-1 language code of the original posting."
},
employer_type: {
type: ["string", "null"],
description: "Internal vs. external recruiter.",
enum: ["Internal", "External", null]
},
job_id: {
type: ["string", "null"],
description: "Publisher/job-board identifier if present."
}
},
required: ["title", "employment_type", "workplace_type", "source_url"]
}
}
];
structured_data = openai.chat.completions.create({
model: "gpt-4o-mini",
messages: [
{
role: "system",
content:
"You are an advanced AI that takes potentially incomplete or inconsistent job posting data and outputs clean, structured JSON for use in a job search engine, using job_information.job_description as the source of truth. Extract exact matches when possible, synthesize when context is needed, make educated guesses where required, resolve contradictions wisely, and follow all field-specific formatting and inference guidelines.",
},
{
role: "user",
content: `Process job posting data:\n\n${jdData}\n\n\nNote: job_information.job_description is the most truthful field. If conflicting information is found, prioritize job_information.job_description.`,
},
],
response_format: {
type: "json_schema",
json_schema: {
name: "job_posting_schema",
strict: true,
schema: gpt_functions_param_jobs_fetching[0].parameters,
},
},
temperature: 0,
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment