Skip to content

Instantly share code, notes, and snippets.

@devAgam
Last active August 26, 2024 04:26
Show Gist options
  • Save devAgam/c650ac67336c659fbe9cdf17a3ca216c to your computer and use it in GitHub Desktop.
Save devAgam/c650ac67336c659fbe9cdf17a3ca216c to your computer and use it in GitHub Desktop.
const express = require("express");
const mongoose = require("mongoose");
const cors = require("cors");
const app = express();
const htmlToText = require("html-to-text");
app.use(cors());
app.use(express.json({ limit: "50mb" }));
mongoose.connect("mongodb://localhost:27017/web-scraper", {
useNewUrlParser: true,
useUnifiedTopology: true,
});
// Define a Story schema
const storySchema = new mongoose.Schema({
title: String,
link: {
type: String,
unique: true,
},
category: String,
tags: [String],
date: Date,
excerpt: String,
sourceBody: String,
gotStoryContent: Boolean,
errorScraping: Boolean,
});
const Story = mongoose.model("stories", storySchema);
// API endpoint to receive data
app.post("/api/stories", async (req, res) => {
try {
const stories = req.body.stories;
for (let story of stories) {
try {
const newStory = new Story(story);
await newStory.save();
console.log("Story saved:", newStory.title);
} catch (error) {
if (error.code === 11000) {
console.error("Duplicate story found:", story.title);
continue;
}
console.error("Failed to save story:", error.message);
}
}
res.status(201).json({ message: "Stories saved successfully" });
} catch (error) {
res.status(500).json({ error: "Failed to save stories" });
}
});
// sends 5 stores to the extension on demand to scrape
app.get("/get-where-no-story-content", async (req, res) => {
const story = await Story.find({
$or: [
{ gotStoryContent: { $exists: false } },
{
gotStoryContent: false,
errorScraping: false,
},
],
}).limit(5);
res.json(story);
});
// Receives the story body from the extension.
app.post("/update-story-content/:id", async (req, res) => {
const { id } = req.params;
const { sourceBody, gotStoryContent, errorScraping } = req.body;
const htmlConverted = await htmlToText.convert(sourceBody, {
wordwrap: 130,
selectors: [
{ selector: "a", format: "inline" },
{ selector: "img", format: "skip" },
],
});
const story = await Story.findByIdAndUpdate(
id,
{
sourceBody: htmlConverted,
gotStoryContent: gotStoryContent,
errorScraping: errorScraping,
},
{
new: true,
}
);
res.json(story);
});
// Start the server
app.listen(8000, () => {
console.log("Server is running on http://localhost:8000");
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment