Created
August 26, 2024 05:14
-
-
Save devAgam/1fa1c31a6c1849889965ede4f6436029 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const MAX_CONCURRENT_TASKS = 5; | |
async function fetchStories() { | |
const response = await fetch( | |
"http://localhost:8000/get-where-no-story-content" | |
); | |
return response.json(); | |
} | |
async function updateStoryContent(_id, content) { | |
const response = await fetch( | |
`http://localhost:8000/update-story-content/${_id}`, | |
{ | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json", | |
}, | |
body: JSON.stringify({ | |
sourceBody: content, | |
gotStoryContent: content.length < 1 ? false : true, | |
errorScraping: content.length < 1 ? true : false, | |
}), | |
} | |
); | |
return response.json(); | |
} | |
async function scrapeContent(tabId, url) { | |
return new Promise((resolve, reject) => { | |
chrome.scripting.executeScript( | |
{ | |
target: { tabId: tabId }, | |
func: () => document.querySelector(".story-content")?.innerHTML || null, | |
}, | |
(results) => { | |
if (chrome.runtime.lastError) { | |
reject(chrome.runtime.lastError); | |
} else if (results && results[0] && results[0].result) { | |
resolve(results[0].result); | |
} else { | |
reject(new Error("No story content found")); | |
} | |
} | |
); | |
}); | |
} | |
async function processSingleStory(story) { | |
const { _id, link } = story; | |
return new Promise((resolve, reject) => { | |
chrome.tabs.create({ url: link, active: false }, async (tab) => { | |
let content = null; | |
try { | |
const response = await fetch(link, { redirect: "manual" }); | |
if (response.url !== link) { | |
console.log( | |
"Redirect detected:", | |
link, | |
"redirected to", | |
response.url | |
); | |
content = ""; // Set content to an empty string if redirected | |
} else { | |
content = await scrapeContent(tab.id, link); | |
} | |
} catch (error) { | |
console.error("Error scraping content:", error); | |
} finally { | |
await updateStoryContent(_id, content); | |
chrome.tabs.remove(tab.id); // Close the tab after processing | |
resolve(); // Indicate that this story has been processed | |
} | |
}); | |
}); | |
} | |
async function processStoriesInParallel(stories) { | |
const tasks = stories.map((story) => processSingleStory(story)); | |
await Promise.all(tasks); // Wait for all tasks to complete | |
// Check if there are more stories to process | |
const nextStories = await fetchStories(); | |
if (nextStories.length > 0) { | |
processStoriesInParallel(nextStories); // Start the next batch of tasks | |
} else { | |
console.log("All stories processed."); | |
} | |
} | |
chrome.runtime.onMessage.addListener((message) => { | |
if (message.action === "startScraping") { | |
fetchStories().then(processStoriesInParallel); | |
} | |
}); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Not Needed Here |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"manifest_version": 3, | |
"name": "ISS Web Scraper Extension", | |
"version": "1.0", | |
"permissions": ["activeTab", "scripting", "storage"], | |
"background": { | |
"service_worker": "background.js" | |
}, | |
"host_permissions": ["https://www.[website you want to scrape]/*"], | |
"action": { | |
"default_popup": "popup.html" | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8" /> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> | |
<title>Web Scraper Extension</title> | |
</head> | |
<body> | |
<button id="start-scraping">Start Scraping</button> | |
<script src="popup.js"></script> | |
</body> | |
</html> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
document.getElementById("start-scraping").addEventListener("click", () => { | |
chrome.runtime.sendMessage({ action: "startScraping" }); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment