-
-
Save amirhp-com/18c12f7f49be829bd51e2c4156903573 to your computer and use it in GitHub Desktop.
Web Scraper to Telegram Bot
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
create a KV and bind to your worker -> kv_link | |
*/ | |
const pageUrl = "https://ledc.ir/%D8%AE%D8%A7%D9%85%D9%88%D8%B4%DB%8C%D9%87%D8%A7%DB%8C-%D8%A8%D8%B1%D9%86%D8%A7%D9%85%D9%87-%D8%B1%DB%8C%D8%B2%DB%8C-%D8%B4%D8%AF%D9%87"; | |
const botToken = "8478782432:AAG7afaplinQtnGgLbjgEgsAhbUQHRRYXFo"; | |
const chatId = "@barghmire"; | |
export default { | |
async fetch(request, env, ctx) { | |
return handleRequest(env, request); | |
}, | |
async scheduled(event, env, ctx) { | |
ctx.waitUntil(handleRequest(env)); | |
}, | |
}; | |
async function handleRequest(env, request = null) { | |
const kv = env.kv_link; | |
let dev = false; | |
if (request) { | |
const url = new URL(request.url); | |
if (url.searchParams.get("dev") === "1") { | |
dev = true; | |
} | |
} | |
const url = gproxy(pageUrl); // استفاده از گوگل ترسلیت برای لود کردن صفحه | |
try { | |
const res = await fetch(url, { | |
headers: { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115 Safari/537.36", | |
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", | |
"Accept-Language": "fa-IR,fa;q=0.9,en;q=0.8" | |
} | |
}); | |
const html = await res.text(); | |
// شروع پردازش صفحه | |
const conditions = [ | |
["بروجرد", "جعفری"], | |
["بروجرد", "بهار"] | |
]; | |
const trMatches = [...html.matchAll(/<tr[\s\S]*?<\/tr>/gi)]; | |
const rows = []; | |
trMatches.forEach((tr, idx) => { | |
const trHtml = tr[0]; | |
const text = trHtml.replace(/<[^>]+>/g, " "); // متن خالص | |
const matched = conditions.find(words => | |
words.every(w => text.includes(w)) | |
); | |
if (matched) { | |
const tdMatches = [...trHtml.matchAll(/<td[^>]*>([\s\S]*?)<\/td>/gi)]; | |
const third = tdMatches[2] ? tdMatches[2][1].replace(/<[^>]+>/g, "").trim() : null; | |
rows.push({ label: matched.join(","), value: third }); | |
} | |
}); | |
const date = extractPersianDate(html) || "none"; | |
const lastDate = await kv.get("last_date"); | |
if (!dev) { | |
if (date == "none") { | |
return new Response("try again ...", { status: 200 }); | |
} | |
if (lastDate === date) { | |
return new Response("تاریخ مشابه قبلی است، ارسال انجام نشد.", { status: 200 }); | |
} | |
} | |
let message = date + "\n\n"; | |
rows.forEach(r => { | |
message += `<b>${r.label}: ${r.value}</b>\n`; | |
}); | |
// پایان پردازش صفخه | |
// ارسال به تلگرام | |
const telegramUrl = `https://api.telegram.org/bot${botToken}/sendMessage?chat_id=${chatId}&text=${encodeURIComponent(message)}&parse_mode=html`; | |
await fetch(telegramUrl); | |
await kv.put("last_date", date); | |
if (dev) { | |
return new Response(message+"\n"+html, { headers: { "Content-Type": "text/plain; charset=utf-8" } }); | |
} | |
return new Response(message, { headers: { "Content-Type": "text/plain; charset=utf-8" } }); | |
} catch (err) { | |
return new Response("Error: " + err.message, { status: 500 }); | |
} | |
} | |
function gproxy(url) { | |
try { | |
let u = new URL(url); | |
u.hostname = u.hostname.replace(/\./g, "-") + ".translate.goog"; | |
["_x_tr_sl=en", "_x_tr_tl=fa", "_x_tr_hl=en", "_x_tr_pto=wapp"] | |
.forEach(p => { let [k, v] = p.split("="); u.searchParams.set(k, v) }); | |
return u.toString(); | |
} catch (e) { return null } | |
} | |
function extractPersianDate(text) { | |
const match = text.match(/(شنبه|یکشنبه|دوشنبه|سهشنبه|سه شنبه|چهارشنبه|پنجشنبه|جمعه)\s+\d{4}\/\d{2}\/\d{2}/); | |
return match ? match[0] : null; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment