Created
October 7, 2024 11:12
-
-
Save pleabargain/48811538e97f4283f936692ff91d88bb to your computer and use it in GitHub Desktop.
n8n nodes for scraping linkedin using openai to google sheets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"meta": { | |
"instanceId": "5970330cf84a96bb271f539d4bac6134d8132cbad51d8237733e8f17982e0bdc" | |
}, | |
"nodes": [ | |
{ | |
"parameters": {}, | |
"id": "bb2d4060-f43f-4393-800b-55bf0f44ddb0", | |
"name": "Execute Workflow Trigger", | |
"type": "n8n-nodes-base.executeWorkflowTrigger", | |
"typeVersion": 1, | |
"position": [ | |
820, | |
320 | |
] | |
}, | |
{ | |
"parameters": { | |
"url": "https://www.google.com/search", | |
"sendQuery": true, | |
"queryParameters": { | |
"parameters": [ | |
{ | |
"name": "q", | |
"value": "=site:linkedin.com/in/ {{ $json.message.content.jobTitle }} + {{ $json.message.content.industry }} + {{ $json.message.content.location }}" | |
} | |
] | |
}, | |
"sendHeaders": true, | |
"headerParameters": { | |
"parameters": [ | |
{ | |
"name": "user-agent", | |
"value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.132 Safari/537.36" | |
}, | |
{} | |
] | |
}, | |
"options": {} | |
}, | |
"id": "082e178f-69f7-4253-a75a-9a70b7a819eb", | |
"name": "HTTP Request", | |
"type": "n8n-nodes-base.httpRequest", | |
"typeVersion": 4.2, | |
"position": [ | |
1380, | |
320 | |
] | |
}, | |
{ | |
"parameters": { | |
"modelId": { | |
"__rl": true, | |
"value": "gpt-3.5-turbo", | |
"mode": "list", | |
"cachedResultName": "GPT-3.5-TURBO" | |
}, | |
"messages": { | |
"values": [ | |
{ | |
"content": "=Parse the JSON \"query\" and output the following parameters separately: \n\njobTitle \ncompany\nIndustry \nlocation\n", | |
"role": "system" | |
}, | |
{ | |
"content": "={{JSON.stringify ($json.query) }}" | |
} | |
] | |
}, | |
"jsonOutput": true, | |
"options": {} | |
}, | |
"id": "323b4e32-526f-4ffe-b5e1-ecf6bca39e8a", | |
"name": "OpenAI1", | |
"type": "@n8n/n8n-nodes-langchain.openAi", | |
"typeVersion": 1.5, | |
"position": [ | |
1000, | |
320 | |
], | |
"credentials": { | |
"openAiApi": { | |
"id": "yN1DQVBvF8QW7j77", | |
"name": "OpenAi account" | |
} | |
} | |
}, | |
{ | |
"parameters": { | |
"jsCode": "// 'items' is the input provided by n8n, containing data from the previous node (HTTP Request in this case)\n\n// Assuming the HTTP Request data is available in items[0].json.data\n// Modify this if your actual structure is different\nconst httpRequestData = items[0].json.data;\n\n// Regular expression to match LinkedIn URLs\nconst linkedinUrlPattern = /(https?:\\/\\/[a-zA-Z0-9.-]*linkedin\\.com\\/in\\/[a-zA-Z0-9-]+)/g;\n\n// Function to extract unique LinkedIn URLs from the request data\nfunction extractUniqueUrls(data) {\n const matches = [...data.matchAll(linkedinUrlPattern)]; // Use matchAll to get all matches\n const urls = matches.map(match => match[0]); // Extract full URL (group 0)\n const uniqueUrls = [...new Set(urls)]; // Remove duplicates using Set\n return uniqueUrls;\n}\n\n// Extract unique URLs from the HTTP request data\nconst linkedinUrls = extractUniqueUrls(httpRequestData);\n\n// Prepare the output in n8n's expected format: an array of objects with unique URLs\nconst output = linkedinUrls.map(url => {\n return { json: { url } }; // each URL becomes an object with a `json` key\n});\n\n// Return the output array to n8n\nreturn output;\n" | |
}, | |
"id": "bb6b77a0-94ec-4270-b3d8-e1419a462ede", | |
"name": "Code", | |
"type": "n8n-nodes-base.code", | |
"typeVersion": 2, | |
"position": [ | |
1600, | |
320 | |
] | |
}, | |
{ | |
"parameters": { | |
"operation": "append", | |
"documentId": { | |
"__rl": true, | |
"value": "1GHALx9nA0MTB7gP16rxj8R6afH-tFb-Qvo3DK34M_uI", | |
"mode": "id" | |
}, | |
"sheetName": { | |
"__rl": true, | |
"value": "Sheet1", | |
"mode": "name" | |
}, | |
"columns": { | |
"mappingMode": "autoMapInputData", | |
"value": {}, | |
"matchingColumns": [], | |
"schema": [ | |
{ | |
"id": "URLs", | |
"displayName": "URLs", | |
"required": false, | |
"defaultMatch": false, | |
"display": true, | |
"type": "string", | |
"canBeUsedToMatch": true, | |
"removed": false | |
}, | |
{ | |
"id": "url", | |
"displayName": "url", | |
"required": false, | |
"defaultMatch": false, | |
"display": true, | |
"type": "string", | |
"canBeUsedToMatch": true, | |
"removed": false | |
} | |
] | |
}, | |
"options": {} | |
}, | |
"id": "d85eed9f-5b35-4d32-b11d-a50be855da0a", | |
"name": "Google Sheets", | |
"type": "n8n-nodes-base.googleSheets", | |
"typeVersion": 4.5, | |
"position": [ | |
1820, | |
320 | |
], | |
"credentials": { | |
"googleSheetsOAuth2Api": { | |
"id": "Eomtk076fGT5DGhI", | |
"name": "Google Sheets account" | |
} | |
} | |
}, | |
{ | |
"parameters": { | |
"assignments": { | |
"assignments": [ | |
{ | |
"id": "e9931689-079e-40ae-9d2d-aca3255afa78", | |
"name": "response", | |
"value": "done", | |
"type": "string" | |
} | |
] | |
}, | |
"options": {} | |
}, | |
"id": "d9f4c124-6fb7-4dd0-b72e-42c086733243", | |
"name": "Edit Fields", | |
"type": "n8n-nodes-base.set", | |
"typeVersion": 3.4, | |
"position": [ | |
2040, | |
320 | |
] | |
} | |
], | |
"connections": { | |
"Execute Workflow Trigger": { | |
"main": [ | |
[ | |
{ | |
"node": "OpenAI1", | |
"type": "main", | |
"index": 0 | |
} | |
] | |
] | |
}, | |
"HTTP Request": { | |
"main": [ | |
[ | |
{ | |
"node": "Code", | |
"type": "main", | |
"index": 0 | |
} | |
] | |
] | |
}, | |
"OpenAI1": { | |
"main": [ | |
[ | |
{ | |
"node": "HTTP Request", | |
"type": "main", | |
"index": 0 | |
} | |
] | |
] | |
}, | |
"Code": { | |
"main": [ | |
[ | |
{ | |
"node": "Google Sheets", | |
"type": "main", | |
"index": 0 | |
} | |
] | |
] | |
}, | |
"Google Sheets": { | |
"main": [ | |
[ | |
{ | |
"node": "Edit Fields", | |
"type": "main", | |
"index": 0 | |
} | |
] | |
] | |
} | |
}, | |
"pinData": { | |
"Execute Workflow Trigger": [ | |
{ | |
"query": "jobTitle=&industry=&location=&" | |
} | |
] | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment