Last active
November 20, 2018 06:21
-
-
Save jlubawy/fcfa0adf023ed8f2017d8f475b01cdd0 to your computer and use it in GitHub Desktop.
go-boilerpipe example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/csv" | |
"net/http" | |
"net/http/cookiejar" | |
"os" | |
"github.com/jlubawy/go-boilerpipe" | |
"github.com/jlubawy/go-cli" | |
"golang.org/x/net/publicsuffix" | |
) | |
func main() { | |
jar, err := cookiejar.New(&cookiejar.Options{PublicSuffixList: publicsuffix.List}) | |
if err != nil { | |
cli.Fatalf("error creating cookie jar: %v\n", err) | |
} | |
client := &http.Client{ | |
Jar: jar, | |
} | |
// Open input CSV file | |
f, err := os.Open("comparision.csv") | |
if err != nil { | |
cli.Fatalf("error opening file: %v\n", err) | |
} | |
defer f.Close() | |
r := csv.NewReader(f) | |
// Open output CSV file | |
of, err := os.Create("output.csv") | |
if err != nil { | |
cli.Fatalf("error opening output file: %v\n", err) | |
} | |
defer of.Close() | |
w := csv.NewWriter(of) | |
i := -1 | |
for i < 10 { | |
rec, err := r.Read() | |
if err != nil { | |
cli.Fatalf("error reading csv: %v\n", err) | |
} | |
i += 1 | |
if i <= 0 { | |
if err := w.Write(append(rec, "go-boilerpipe")); err != nil { | |
cli.Fatalf("error writing record: %v\n", err) | |
} | |
continue | |
} | |
if len(rec) != 4 { | |
cli.Fatalf("expected 4 entries: %v\n", rec) | |
} | |
u := rec[0] | |
cli.Infof("downloading %s\n", u) | |
resp, err := client.Get(u) | |
if err != nil { | |
cli.Fatalf("error getting url: %v\n", err) | |
} | |
if resp.StatusCode >= 400 { | |
cli.Fatalf("received http error response: %d\n", resp.StatusCode) | |
} | |
doc, err := boilerpipe.ParseDocument(resp.Body) | |
if err != nil { | |
cli.Fatalf("error parsing response: %v\n", err) | |
} | |
boilerpipe.ArticlePipline.Process(doc) | |
resp.Body.Close() | |
if err := w.Write(append(rec, doc.Content())); err != nil { | |
cli.Fatalf("error writing record: %v\n", err) | |
} | |
w.Flush() | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Link | Boilerpipe | Goose | GoPipe | go-boilerpipe | |
---|---|---|---|---|---|
https://www.facebook.com/1stResponsehvac/videos/quick-overview-of-a-slim-ducted-mini-split/2173471592887121/ | FacebookEnglish (US) | Jump to p p Press + to open this menualt/ For a better experience on Facebook, update your browser. Sign Up Notice You must log in to continue. p Forgot account? Create New Account | Facebook You must log in to continue. Log into Facebook | See more of 1st Response Heating & Air Conditioning Solutions on Facebook Log In | |
https://www.facebook.com/194522123909544/photos/a.660273257334426.1073741825.194522123909544/1266494310045648/?type=3 | FacebookEnglish (US) | Jump to p p Press + to open this menualt/ For a better experience on Facebook, update your browser. Sign Up Notice You must log in to continue. p Forgot account? Create New Account | Facebook You must log in to continue. Log into Facebook | Facebook You must log in to continue. Log into Facebook | |
https://www.facebook.com/1612020065750808/posts/http:-onhax-net-edjing-pro/1724551284497685/ | FacebookEnglish (US) | Jump to p p Press + to open this menualt/ Sign Up Notice You must log in to continue. p Forgot account? Create New Account | Facebook You must log in to continue. Log into Facebook | Facebook You must log in to continue. Log into Facebook | |
https://www.facebook.com/137085536969129/photos/a.137089116968771.1073741828.137085536969129/137090533635296/ | FacebookEnglish (US) | Jump to p p Press + to open this menualt/ For a better experience on Facebook, update your browser. Sign Up Notice You must log in to continue. p Forgot account? Create New Account | Facebook You must log in to continue. Log into Facebook | Facebook You must log in to continue. Log into Facebook | |
https://www.facebook.com/1845OilFieldServices/jobs/802814073251143 | Jobs on Facebook - ★ CDL - OWNER OPERATORS for FRAC SAND Hauling - $4000 Sign On - South Texas ★Jump to Sections of this page Accessibility Help Press alt + / to open this menu RemoveTo help personalize content, tailor and measure ads, and provide a safer experience, we use cookies. By clicking or navigating the site, you agree to allow our collection of information on and off Facebook through cookies. Learn more, including about available controls: Cookies Policy . | Jump to p p Press + to open this menualt/ Sign Up | |||
https://www.facebook.com/174212169998989 | Caribbean Rocks - O2 Forum Kentish TownJump to Sections of this page Accessibility Help Press alt + / to open this menu RemoveTo help personalize content, tailor and measure ads, and provide a safer experience, we use cookies. By clicking or navigating the site, you agree to allow our collection of information on and off Facebook through cookies. Learn more, including about available controls: Cookies Policy . MAR30 | Jump to p p Press + to open this menualt/ Sign Up | Caribbean Rocks - O2 Forum Kentish Town Public | Caribbean Rocks - O2 Forum Kentish Town Public | |
https://www.facebook.com/1052SSFM/posts/565947686833402 | FacebookEnglish (US) | Jump to p p Press + to open this menualt/ For a better experience on Facebook, update your browser. Sign Up Notice You must log in to continue. p Forgot account? Create New Account | Facebook You must log in to continue. Log into Facebook | Facebook You must log in to continue. Log into Facebook | |
https://www.facebook.com/1653653378040448/videos/1694435330628919/ | FacebookEnglish (UK) | Jump to p p Press + to open this menualt/ For a better experience on Facebook, update your browser. Sign Up Notice You must log in to continue. p Forgot account? Create New Account | Facebook You must log in to continue. Log into Facebook | Facebook You must log in to continue. Log into Facebook | |
https://www.facebook.com/1malaysiacitizenedit/videos/10156240117079801/ | FacebookEnglish (US) | Jump to p p Press + to open this menualt/ For a better experience on Facebook, update your browser. Sign Up Notice You must log in to continue. p Forgotten account? Create New Account | Facebook You must log in to continue. Log in to Facebook | Facebook You must log in to continue. Log into Facebook | |
https://www.facebook.com/1615524152042637/photos/a.1615792468682472.1073741827.1615524152042637/1658078534453865 | FacebookEnglish (US) | Jump to p p Press + to open this menualt/ For a better experience on Facebook, update your browser. Sign Up Notice You must log in to continue. p Forgot account? Create New Account | Facebook You must log in to continue. Log into Facebook | Facebook You must log in to continue. Log into Facebook |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment