Last active
September 19, 2016 19:52
-
-
Save calebhearth/a37d06b091d2ae59651db4e42e57c6cb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
❯ go run main.go | |
222224 | |
# a few seconds pass | |
^Csignal: interrupt |
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# http://sdwssrh00:8080/solr/collection/select?q=*:*&fq=CrdDepartmentId%3A1&wt=csv&rows=10000000&indent=true&fl=Id&csv.header=false | |
222224 | |
219390 | |
219391 | |
219900 | |
221437 | |
223072 | |
223071 | |
223067 | |
223068 | |
223070 | |
223069 | |
228170 | |
228168 | |
228177 | |
228172 | |
228171 | |
228179 | |
230938 | |
232209 | |
232204 | |
232208 | |
232212 | |
232211 | |
232207 | |
236111 | |
230934 | |
230932 | |
230933 | |
228931 | |
230004 | |
231173 | |
231174 | |
231172 | |
# 12k more |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"bytes" | |
"encoding/csv" | |
"encoding/json" | |
"fmt" | |
"io/ioutil" | |
"net/http" | |
"reflect" | |
"strconv" | |
) | |
type department int | |
type wrap struct { | |
CollectionObject CollectionObject | |
} | |
type Constituent struct { | |
Name string `json:"ArtistName"` | |
Nationality string `json:"ArtistNationalityIndexed"` | |
CreatorLifeEnd string `json:"EndDate"` | |
} | |
type Geography struct { | |
City string | |
State string | |
Country string | |
Region string | |
SubRegion string | |
GeographyType string `json:"GeoCode"` | |
} | |
type CollectionObject struct { | |
ID string `json:"Id"` | |
WorkType string `json:"ObjectName"` | |
Culture string | |
Period string | |
Dynasty string | |
Reign string | |
DisplayTitle string `json:"Title"` | |
Constituents []Constituent | |
EarliestDisplayDate string `json:"DateBegin"` | |
LatestDisplayDate string `json:"DateEnd"` | |
MaterialsTech string `json:"Medium"` | |
Credit string `json:"CreditLine"` | |
AccessionNumber string | |
YearAcquired string | |
Geography []Geography | |
Classification string | |
Department department | |
} | |
func main() { | |
// Pull csv for all ids in department 1 | |
resp, err := http.Get("http://sdwssrh00:8080/solr/collection/select?q=*:*&fq=CrdDepartmentId%3A1&wt=csv&rows=10000000&indent=true&fl=Id&csv.header=false") | |
if err != nil { | |
panic(err) | |
} | |
// Request the json for each id | |
urlProducer := make(chan string) | |
scanner := bufio.NewScanner(resp.Body) | |
for scanner.Scan() { | |
id, err := strconv.Atoi(scanner.Text()) | |
if err != nil { | |
panic(err) | |
} | |
fmt.Println(id) | |
urlProducer <- fmt.Sprintf("http://www.metmuseum.org/api/collection/collectionobject/%d", id) | |
} | |
resp.Body.Close() | |
sem := make(chan bool, 100) | |
for i := 0; i < 100; i++ { | |
sem <- true | |
} | |
ch := make(chan []byte) | |
for url := range urlProducer { | |
fmt.Println(urlProducer) | |
go func() { | |
<-sem | |
defer func() { sem <- true }() | |
resp, err := http.Get(url) | |
if err != nil { | |
panic(err) | |
} | |
body, err := ioutil.ReadAll(resp.Body) | |
if err != nil { | |
panic(err) | |
} | |
resp.Body.Close() | |
ch <- body | |
}() | |
} | |
var csvBytes bytes.Buffer | |
csvWriter := csv.NewWriter(&csvBytes) | |
var rows [][]string | |
for jsonBody := range ch { | |
var w wrap | |
err = json.Unmarshal(jsonBody, &w) | |
if err != nil { | |
panic(err) | |
} | |
o := w.CollectionObject | |
rows = append(rows, []string{ | |
o.WorkType, | |
o.Culture, | |
o.Period, | |
o.Dynasty, | |
o.Reign, | |
o.DisplayTitle, | |
concatC(o.Constituents, "Name"), | |
concatC(o.Constituents, "Nationality"), | |
"", | |
concatC(o.Constituents, "CreatorLifeEnd"), | |
o.EarliestDisplayDate, | |
o.LatestDisplayDate, | |
o.MaterialsTech, | |
o.Credit, | |
o.AccessionNumber, | |
o.YearAcquired, | |
o.Classification, | |
concatG(o.Geography, "GeographyType"), | |
concatG(o.Geography, "City"), | |
concatG(o.Geography, "State"), | |
concatG(o.Geography, "Country"), | |
concatG(o.Geography, "Region"), | |
concatG(o.Geography, "Subregion"), | |
strconv.Itoa(int(o.Department)), | |
fmt.Sprintf("http://www.metmuseum.org/art/collection/search/", o.ID), | |
}) | |
} | |
close(ch) | |
csvWriter.WriteAll(rows) | |
fmt.Println(csvBytes.String()) | |
} | |
func concatC(os []Constituent, field string) string { | |
var concat string | |
for _, o := range os { | |
r := reflect.ValueOf(o) | |
f := reflect.Indirect(r).FieldByName(field) | |
concat = fmt.Sprintf("%s, %s", concat, string(f.String())) | |
} | |
if len(concat) < 1 { | |
return "" | |
} | |
return concat[1:] | |
} | |
func concatG(os []Geography, field string) string { | |
var concat string | |
for _, o := range os { | |
r := reflect.ValueOf(o) | |
f := reflect.Indirect(r).FieldByName(field) | |
concat = fmt.Sprintf("%s, %s", concat, string(f.String())) | |
} | |
if len(concat) < 1 { | |
return "" | |
} | |
return concat[1:] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment