Last active
August 26, 2024 04:26
-
-
Save devAgam/c650ac67336c659fbe9cdf17a3ca216c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const express = require("express"); | |
const mongoose = require("mongoose"); | |
const cors = require("cors"); | |
const app = express(); | |
const htmlToText = require("html-to-text"); | |
app.use(cors()); | |
app.use(express.json({ limit: "50mb" })); | |
mongoose.connect("mongodb://localhost:27017/web-scraper", { | |
useNewUrlParser: true, | |
useUnifiedTopology: true, | |
}); | |
// Define a Story schema | |
const storySchema = new mongoose.Schema({ | |
title: String, | |
link: { | |
type: String, | |
unique: true, | |
}, | |
category: String, | |
tags: [String], | |
date: Date, | |
excerpt: String, | |
sourceBody: String, | |
gotStoryContent: Boolean, | |
errorScraping: Boolean, | |
}); | |
const Story = mongoose.model("stories", storySchema); | |
// API endpoint to receive data | |
app.post("/api/stories", async (req, res) => { | |
try { | |
const stories = req.body.stories; | |
for (let story of stories) { | |
try { | |
const newStory = new Story(story); | |
await newStory.save(); | |
console.log("Story saved:", newStory.title); | |
} catch (error) { | |
if (error.code === 11000) { | |
console.error("Duplicate story found:", story.title); | |
continue; | |
} | |
console.error("Failed to save story:", error.message); | |
} | |
} | |
res.status(201).json({ message: "Stories saved successfully" }); | |
} catch (error) { | |
res.status(500).json({ error: "Failed to save stories" }); | |
} | |
}); | |
// sends 5 stores to the extension on demand to scrape | |
app.get("/get-where-no-story-content", async (req, res) => { | |
const story = await Story.find({ | |
$or: [ | |
{ gotStoryContent: { $exists: false } }, | |
{ | |
gotStoryContent: false, | |
errorScraping: false, | |
}, | |
], | |
}).limit(5); | |
res.json(story); | |
}); | |
// Receives the story body from the extension. | |
app.post("/update-story-content/:id", async (req, res) => { | |
const { id } = req.params; | |
const { sourceBody, gotStoryContent, errorScraping } = req.body; | |
const htmlConverted = await htmlToText.convert(sourceBody, { | |
wordwrap: 130, | |
selectors: [ | |
{ selector: "a", format: "inline" }, | |
{ selector: "img", format: "skip" }, | |
], | |
}); | |
const story = await Story.findByIdAndUpdate( | |
id, | |
{ | |
sourceBody: htmlConverted, | |
gotStoryContent: gotStoryContent, | |
errorScraping: errorScraping, | |
}, | |
{ | |
new: true, | |
} | |
); | |
res.json(story); | |
}); | |
// Start the server | |
app.listen(8000, () => { | |
console.log("Server is running on http://localhost:8000"); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment