Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save pleabargain/48811538e97f4283f936692ff91d88bb to your computer and use it in GitHub Desktop.
Save pleabargain/48811538e97f4283f936692ff91d88bb to your computer and use it in GitHub Desktop.
n8n nodes for scraping linkedin using openai to google sheets
{
"meta": {
"instanceId": "5970330cf84a96bb271f539d4bac6134d8132cbad51d8237733e8f17982e0bdc"
},
"nodes": [
{
"parameters": {},
"id": "bb2d4060-f43f-4393-800b-55bf0f44ddb0",
"name": "Execute Workflow Trigger",
"type": "n8n-nodes-base.executeWorkflowTrigger",
"typeVersion": 1,
"position": [
820,
320
]
},
{
"parameters": {
"url": "https://www.google.com/search",
"sendQuery": true,
"queryParameters": {
"parameters": [
{
"name": "q",
"value": "=site:linkedin.com/in/ {{ $json.message.content.jobTitle }} + {{ $json.message.content.industry }} + {{ $json.message.content.location }}"
}
]
},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "user-agent",
"value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.132 Safari/537.36"
},
{}
]
},
"options": {}
},
"id": "082e178f-69f7-4253-a75a-9a70b7a819eb",
"name": "HTTP Request",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [
1380,
320
]
},
{
"parameters": {
"modelId": {
"__rl": true,
"value": "gpt-3.5-turbo",
"mode": "list",
"cachedResultName": "GPT-3.5-TURBO"
},
"messages": {
"values": [
{
"content": "=Parse the JSON \"query\" and output the following parameters separately: \n\njobTitle \ncompany\nIndustry \nlocation\n",
"role": "system"
},
{
"content": "={{JSON.stringify ($json.query) }}"
}
]
},
"jsonOutput": true,
"options": {}
},
"id": "323b4e32-526f-4ffe-b5e1-ecf6bca39e8a",
"name": "OpenAI1",
"type": "@n8n/n8n-nodes-langchain.openAi",
"typeVersion": 1.5,
"position": [
1000,
320
],
"credentials": {
"openAiApi": {
"id": "yN1DQVBvF8QW7j77",
"name": "OpenAi account"
}
}
},
{
"parameters": {
"jsCode": "// 'items' is the input provided by n8n, containing data from the previous node (HTTP Request in this case)\n\n// Assuming the HTTP Request data is available in items[0].json.data\n// Modify this if your actual structure is different\nconst httpRequestData = items[0].json.data;\n\n// Regular expression to match LinkedIn URLs\nconst linkedinUrlPattern = /(https?:\\/\\/[a-zA-Z0-9.-]*linkedin\\.com\\/in\\/[a-zA-Z0-9-]+)/g;\n\n// Function to extract unique LinkedIn URLs from the request data\nfunction extractUniqueUrls(data) {\n const matches = [...data.matchAll(linkedinUrlPattern)]; // Use matchAll to get all matches\n const urls = matches.map(match => match[0]); // Extract full URL (group 0)\n const uniqueUrls = [...new Set(urls)]; // Remove duplicates using Set\n return uniqueUrls;\n}\n\n// Extract unique URLs from the HTTP request data\nconst linkedinUrls = extractUniqueUrls(httpRequestData);\n\n// Prepare the output in n8n's expected format: an array of objects with unique URLs\nconst output = linkedinUrls.map(url => {\n return { json: { url } }; // each URL becomes an object with a `json` key\n});\n\n// Return the output array to n8n\nreturn output;\n"
},
"id": "bb6b77a0-94ec-4270-b3d8-e1419a462ede",
"name": "Code",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [
1600,
320
]
},
{
"parameters": {
"operation": "append",
"documentId": {
"__rl": true,
"value": "1GHALx9nA0MTB7gP16rxj8R6afH-tFb-Qvo3DK34M_uI",
"mode": "id"
},
"sheetName": {
"__rl": true,
"value": "Sheet1",
"mode": "name"
},
"columns": {
"mappingMode": "autoMapInputData",
"value": {},
"matchingColumns": [],
"schema": [
{
"id": "URLs",
"displayName": "URLs",
"required": false,
"defaultMatch": false,
"display": true,
"type": "string",
"canBeUsedToMatch": true,
"removed": false
},
{
"id": "url",
"displayName": "url",
"required": false,
"defaultMatch": false,
"display": true,
"type": "string",
"canBeUsedToMatch": true,
"removed": false
}
]
},
"options": {}
},
"id": "d85eed9f-5b35-4d32-b11d-a50be855da0a",
"name": "Google Sheets",
"type": "n8n-nodes-base.googleSheets",
"typeVersion": 4.5,
"position": [
1820,
320
],
"credentials": {
"googleSheetsOAuth2Api": {
"id": "Eomtk076fGT5DGhI",
"name": "Google Sheets account"
}
}
},
{
"parameters": {
"assignments": {
"assignments": [
{
"id": "e9931689-079e-40ae-9d2d-aca3255afa78",
"name": "response",
"value": "done",
"type": "string"
}
]
},
"options": {}
},
"id": "d9f4c124-6fb7-4dd0-b72e-42c086733243",
"name": "Edit Fields",
"type": "n8n-nodes-base.set",
"typeVersion": 3.4,
"position": [
2040,
320
]
}
],
"connections": {
"Execute Workflow Trigger": {
"main": [
[
{
"node": "OpenAI1",
"type": "main",
"index": 0
}
]
]
},
"HTTP Request": {
"main": [
[
{
"node": "Code",
"type": "main",
"index": 0
}
]
]
},
"OpenAI1": {
"main": [
[
{
"node": "HTTP Request",
"type": "main",
"index": 0
}
]
]
},
"Code": {
"main": [
[
{
"node": "Google Sheets",
"type": "main",
"index": 0
}
]
]
},
"Google Sheets": {
"main": [
[
{
"node": "Edit Fields",
"type": "main",
"index": 0
}
]
]
}
},
"pinData": {
"Execute Workflow Trigger": [
{
"query": "jobTitle=&industry=&location=&"
}
]
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment