This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using HtmlAgilityPack; | |
using ScrapySharp.Extensions; | |
using ScrapySharp.Network; | |
using System.IO; | |
using System.Globalization; | |
using CsvHelper; | |
namespace ScrapySharp_scraper |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void Main(string[] args) | |
{ | |
Console.WriteLine("Please enter the Keyword :"); | |
var Keyword = Console.ReadLine(); | |
var adLinks = GetAdLinks("https://losangeles.craigslist.org/search/bbb?"); | |
var lstAdDetails = GetAdDetails(adLinks, Keyword); | |
ExportAdsToCsv(lstAdDetails, Keyword); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void ExportAdsToCsv(List<AdDetails> lstAdDetails, string Keyword){ | |
using(var writer = new StreamWriter($@"/Users/guest/Desktop/ScrapySharp_scraper/CSVs/{Keyword}_{DateTime.Now.ToFileTime()}.csv")) | |
using(var csv = new CsvWriter(writer, CultureInfo.InvariantCulture)){ | |
csv.WriteRecords(lstAdDetails); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void Main(string[] args) | |
{ | |
Console.WriteLine("Please enter the Keyword :"); | |
var Keyword = Console.ReadLine(); | |
var adLinks = GetAdLinks("https://losangeles.craigslist.org/search/bbb?"); | |
var lstAdDetails = GetAdDetails(adLinks, Keyword); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static List<AdDetails> GetAdDetails(List<string> urls, string Keyword){ | |
var lstAdDetails = new List<AdDetails>(); | |
foreach (var url in urls){ | |
var htmlNode = GetHtml(url); | |
var AdDetails = new AdDetails(); | |
AdDetails.AdTitle = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/head/title").InnerText; | |
var description = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/body/section/section/section/section").InnerText; | |
AdDetails.AdDescription = description.Replace("\n \n QR Code Link to This Post\n \n \n", ""); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void Main(string[] args) | |
{ | |
var adLinks = GetAdLinks("https://losangeles.craigslist.org/search/bbb?"); | |
var lstAdDetails = GetAdDetails(adLinks); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static List<AdDetails> GetAdDetails(List<string> urls){ | |
var lstAdDetails = new List<AdDetails>(); | |
foreach (var url in urls){ | |
var htmlNode = GetHtml(url); | |
var AdDetails = new AdDetails(); | |
AdDetails.AdTitle = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/head/title").InnerText; | |
var description = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/body/section/section/section/section").InnerText; | |
AdDetails.AdDescription = description.Replace("\n \n QR Code Link to This Post\n \n \n", ""); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class AdDetails{ | |
public string AdTitle { get; set; } | |
public string AdDescription { get; set; } | |
public string AdUrl { get; set; } | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static List<string> GetAdLinks(string url){ | |
var mainPageAdLinks = new List<string>(); | |
var html = GetHtml(url); | |
var links = html.CssSelect("a"); | |
foreach (var link in links){ | |
if(link.Attributes["href"].Value.Contains(".html")){ | |
mainPageAdLinks.Add(link.Attributes["href"].Value); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rvest) | |
library(dplyr) | |
get_cast = function(movie_link) { | |
movie_page = read_html(movie_link) | |
movie_cast = movie_page %>% html_nodes(".primary_photo+ td a") %>% html_text() %>% paste(collapse = ",") | |
return(movie_cast) | |
} | |
movie_list = data.frame() |
NewerOlder