Skip to content

Instantly share code, notes, and snippets.

View saasindustries's full-sized avatar

SaaS Industries saasindustries

View GitHub Profile
using System;
using System.Collections.Generic;
using HtmlAgilityPack;
using ScrapySharp.Extensions;
using ScrapySharp.Network;
using System.IO;
using System.Globalization;
using CsvHelper;
namespace ScrapySharp_scraper
static void Main(string[] args)
{
Console.WriteLine("Please enter the Keyword :");
var Keyword = Console.ReadLine();
var adLinks = GetAdLinks("https://losangeles.craigslist.org/search/bbb?");
var lstAdDetails = GetAdDetails(adLinks, Keyword);
ExportAdsToCsv(lstAdDetails, Keyword);
}
static void ExportAdsToCsv(List<AdDetails> lstAdDetails, string Keyword){
using(var writer = new StreamWriter($@"/Users/guest/Desktop/ScrapySharp_scraper/CSVs/{Keyword}_{DateTime.Now.ToFileTime()}.csv"))
using(var csv = new CsvWriter(writer, CultureInfo.InvariantCulture)){
csv.WriteRecords(lstAdDetails);
}
}
static void Main(string[] args)
{
Console.WriteLine("Please enter the Keyword :");
var Keyword = Console.ReadLine();
var adLinks = GetAdLinks("https://losangeles.craigslist.org/search/bbb?");
var lstAdDetails = GetAdDetails(adLinks, Keyword);
}
static List<AdDetails> GetAdDetails(List<string> urls, string Keyword){
var lstAdDetails = new List<AdDetails>();
foreach (var url in urls){
var htmlNode = GetHtml(url);
var AdDetails = new AdDetails();
AdDetails.AdTitle = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/head/title").InnerText;
var description = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/body/section/section/section/section").InnerText;
AdDetails.AdDescription = description.Replace("\n \n QR Code Link to This Post\n \n \n", "");
static void Main(string[] args)
{
var adLinks = GetAdLinks("https://losangeles.craigslist.org/search/bbb?");
var lstAdDetails = GetAdDetails(adLinks);
}
static List<AdDetails> GetAdDetails(List<string> urls){
var lstAdDetails = new List<AdDetails>();
foreach (var url in urls){
var htmlNode = GetHtml(url);
var AdDetails = new AdDetails();
AdDetails.AdTitle = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/head/title").InnerText;
var description = htmlNode.OwnerDocument.DocumentNode.SelectSingleNode("//html/body/section/section/section/section").InnerText;
AdDetails.AdDescription = description.Replace("\n \n QR Code Link to This Post\n \n \n", "");
public class AdDetails{
public string AdTitle { get; set; }
public string AdDescription { get; set; }
public string AdUrl { get; set; }
}
static List<string> GetAdLinks(string url){
var mainPageAdLinks = new List<string>();
var html = GetHtml(url);
var links = html.CssSelect("a");
foreach (var link in links){
if(link.Attributes["href"].Value.Contains(".html")){
mainPageAdLinks.Add(link.Attributes["href"].Value);
}
}
library(rvest)
library(dplyr)
get_cast = function(movie_link) {
movie_page = read_html(movie_link)
movie_cast = movie_page %>% html_nodes(".primary_photo+ td a") %>% html_text() %>% paste(collapse = ",")
return(movie_cast)
}
movie_list = data.frame()