This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using System; | |
| using System.Collections.Generic; | |
| using HtmlAgilityPack; | |
| using ScrapySharp.Extensions; | |
| using ScrapySharp.Network; | |
| using System.IO; | |
| using System.Globalization; | |
| using CsvHelper; | |
| namespace ScrapySharp_scraper |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| static void Main(string[] args) | |
| { | |
| Console.WriteLine("Please enter the Keyword :"); | |
| var Keyword = Console.ReadLine(); | |
| var adLinks = GetAdLinks("https://losangeles.craigslist.org/search/bbb?"); | |
| var lstAdDetails = GetAdDetails(adLinks, Keyword); | |
| ExportAdsToCsv(lstAdDetails, Keyword); | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| static void ExportAdsToCsv(List<AdDetails> lstAdDetails, string Keyword){ | |
| using(var writer = new StreamWriter($@"/Users/guest/Desktop/ScrapySharp_scraper/CSVs/{Keyword}_{DateTime.Now.ToFileTime()}.csv")) | |
| using(var csv = new CsvWriter(writer, CultureInfo.InvariantCulture)){ | |
| csv.WriteRecords(lstAdDetails); | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| static void Main(string[] args) | |
| { | |
| Console.WriteLine("Please enter the Keyword :"); | |
| var Keyword = Console.ReadLine(); | |
| var adLinks = GetAdLinks("https://losangeles.craigslist.org/search/bbb?"); | |
| var lstAdDetails = GetAdDetails(adLinks, Keyword); | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| static List<AdDetails> GetAdDetails(List<string> urls, string Keyword){ | |
| var lstAdDetails = new List<AdDetails>(); | |
| foreach (var url in urls){ | |
| var htmlNode = GetHtml(url); | |
| var AdDetails = new AdDetails(); | |
| AdDetails.AdTitle = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/head/title").InnerText; | |
| var description = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/body/section/section/section/section").InnerText; | |
| AdDetails.AdDescription = description.Replace("\n \n QR Code Link to This Post\n \n \n", ""); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| static void Main(string[] args) | |
| { | |
| var adLinks = GetAdLinks("https://losangeles.craigslist.org/search/bbb?"); | |
| var lstAdDetails = GetAdDetails(adLinks); | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| static List<AdDetails> GetAdDetails(List<string> urls){ | |
| var lstAdDetails = new List<AdDetails>(); | |
| foreach (var url in urls){ | |
| var htmlNode = GetHtml(url); | |
| var AdDetails = new AdDetails(); | |
| AdDetails.AdTitle = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/head/title").InnerText; | |
| var description = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/body/section/section/section/section").InnerText; | |
| AdDetails.AdDescription = description.Replace("\n \n QR Code Link to This Post\n \n \n", ""); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| public class AdDetails{ | |
| public string AdTitle { get; set; } | |
| public string AdDescription { get; set; } | |
| public string AdUrl { get; set; } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| static List<string> GetAdLinks(string url){ | |
| var mainPageAdLinks = new List<string>(); | |
| var html = GetHtml(url); | |
| var links = html.CssSelect("a"); | |
| foreach (var link in links){ | |
| if(link.Attributes["href"].Value.Contains(".html")){ | |
| mainPageAdLinks.Add(link.Attributes["href"].Value); | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(rvest) | |
| library(dplyr) | |
| get_cast = function(movie_link) { | |
| movie_page = read_html(movie_link) | |
| movie_cast = movie_page %>% html_nodes(".primary_photo+ td a") %>% html_text() %>% paste(collapse = ",") | |
| return(movie_cast) | |
| } | |
| movie_list = data.frame() |
NewerOlder