Created
February 23, 2021 16:32
-
-
Save saasindustries/ac2ddf9f26911f300808042a6971cf95 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using HtmlAgilityPack; | |
using ScrapySharp.Extensions; | |
using ScrapySharp.Network; | |
using System.IO; | |
using System.Globalization; | |
using CsvHelper; | |
namespace ScrapySharp_scraper | |
{ | |
class Program | |
{ | |
static ScrapingBrowser _scrapingbrowser = new ScrapingBrowser(); | |
static void Main(string[] args) | |
{ | |
Console.WriteLine("Please enter the Keyword :"); | |
var Keyword = Console.ReadLine(); | |
var adLinks = GetAdLinks("https://losangeles.craigslist.org/search/bbb?"); | |
var lstAdDetails = GetAdDetails(adLinks, Keyword); | |
exportAdsToCsv(lstAdDetails, Keyword); | |
} | |
static List<string> GetAdLinks(string url){ | |
var mainPageAdLinks = new List<string>(); | |
var html = GetHtml(url); | |
var links = html.CssSelect("a"); | |
foreach (var link in links){ | |
if(link.Attributes["href"].Value.Contains(".html")){ | |
mainPageAdLinks.Add(link.Attributes["href"].Value); | |
} | |
} | |
return mainPageAdLinks; | |
} | |
static List<AdDetails> GetAdDetails(List<string> urls, string Keyword){ | |
var lstAdDetails = new List<AdDetails>(); | |
foreach (var url in urls){ | |
var htmlNode = GetHtml(url); | |
var AdDetails = new AdDetails(); | |
AdDetails.AdTitle = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/head/title").InnerText; | |
var description = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/body/section/section/section/section").InnerText; | |
AdDetails.AdDescription = description.Replace("\n \n QR Code Link to This Post\n \n \n", ""); | |
AdDetails.AdUrl = url; | |
var KeywordInTitle = AdDetails.AdTitle.ToLower().Contains(Keyword.ToLower()); | |
var KeywordInDescription = AdDetails.AdDescription.ToLower().Contains(Keyword.ToLower()); | |
if(KeywordInTitle || KeywordInDescription){ | |
lstAdDetails.Add(AdDetails); | |
} | |
} | |
return lstAdDetails; | |
} | |
static void exportAdsToCsv(List<AdDetails> lstAdDetails, string Keyword){ | |
using(var writer = new StreamWriter($@"/Users/guest/Desktop/ScrapySharp_scraper/CSVs/{Keyword}_{DateTime.Now.ToFileTime()}.csv")) | |
using(var csv = new CsvWriter(writer, CultureInfo.InvariantCulture)){ | |
csv.WriteRecords(lstAdDetails); | |
} | |
} | |
static HtmlNode GetHtml(string url){ | |
WebPage webPage = _scrapingbrowser.NavigateToPage(new Uri(url)); | |
return webPage.Html; | |
} | |
} | |
public class AdDetails{ | |
public string AdTitle { get; set; } | |
public string AdDescription { get; set; } | |
public string AdUrl { get; set; } | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment