Skip to content

Instantly share code, notes, and snippets.

@calebhearth
Last active September 19, 2016 19:52
Show Gist options
  • Save calebhearth/a37d06b091d2ae59651db4e42e57c6cb to your computer and use it in GitHub Desktop.
Save calebhearth/a37d06b091d2ae59651db4e42e57c6cb to your computer and use it in GitHub Desktop.
❯ go run main.go
222224
# a few seconds pass
^Csignal: interrupt
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
# http://sdwssrh00:8080/solr/collection/select?q=*:*&fq=CrdDepartmentId%3A1&wt=csv&rows=10000000&indent=true&fl=Id&csv.header=false
222224
219390
219391
219900
221437
223072
223071
223067
223068
223070
223069
228170
228168
228177
228172
228171
228179
230938
232209
232204
232208
232212
232211
232207
236111
230934
230932
230933
228931
230004
231173
231174
231172
# 12k more
package main
import (
"bufio"
"bytes"
"encoding/csv"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"reflect"
"strconv"
)
type department int
type wrap struct {
CollectionObject CollectionObject
}
type Constituent struct {
Name string `json:"ArtistName"`
Nationality string `json:"ArtistNationalityIndexed"`
CreatorLifeEnd string `json:"EndDate"`
}
type Geography struct {
City string
State string
Country string
Region string
SubRegion string
GeographyType string `json:"GeoCode"`
}
type CollectionObject struct {
ID string `json:"Id"`
WorkType string `json:"ObjectName"`
Culture string
Period string
Dynasty string
Reign string
DisplayTitle string `json:"Title"`
Constituents []Constituent
EarliestDisplayDate string `json:"DateBegin"`
LatestDisplayDate string `json:"DateEnd"`
MaterialsTech string `json:"Medium"`
Credit string `json:"CreditLine"`
AccessionNumber string
YearAcquired string
Geography []Geography
Classification string
Department department
}
func main() {
// Pull csv for all ids in department 1
resp, err := http.Get("http://sdwssrh00:8080/solr/collection/select?q=*:*&fq=CrdDepartmentId%3A1&wt=csv&rows=10000000&indent=true&fl=Id&csv.header=false")
if err != nil {
panic(err)
}
// Request the json for each id
urlProducer := make(chan string)
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
id, err := strconv.Atoi(scanner.Text())
if err != nil {
panic(err)
}
fmt.Println(id)
urlProducer <- fmt.Sprintf("http://www.metmuseum.org/api/collection/collectionobject/%d", id)
}
resp.Body.Close()
sem := make(chan bool, 100)
for i := 0; i < 100; i++ {
sem <- true
}
ch := make(chan []byte)
for url := range urlProducer {
fmt.Println(urlProducer)
go func() {
<-sem
defer func() { sem <- true }()
resp, err := http.Get(url)
if err != nil {
panic(err)
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
panic(err)
}
resp.Body.Close()
ch <- body
}()
}
var csvBytes bytes.Buffer
csvWriter := csv.NewWriter(&csvBytes)
var rows [][]string
for jsonBody := range ch {
var w wrap
err = json.Unmarshal(jsonBody, &w)
if err != nil {
panic(err)
}
o := w.CollectionObject
rows = append(rows, []string{
o.WorkType,
o.Culture,
o.Period,
o.Dynasty,
o.Reign,
o.DisplayTitle,
concatC(o.Constituents, "Name"),
concatC(o.Constituents, "Nationality"),
"",
concatC(o.Constituents, "CreatorLifeEnd"),
o.EarliestDisplayDate,
o.LatestDisplayDate,
o.MaterialsTech,
o.Credit,
o.AccessionNumber,
o.YearAcquired,
o.Classification,
concatG(o.Geography, "GeographyType"),
concatG(o.Geography, "City"),
concatG(o.Geography, "State"),
concatG(o.Geography, "Country"),
concatG(o.Geography, "Region"),
concatG(o.Geography, "Subregion"),
strconv.Itoa(int(o.Department)),
fmt.Sprintf("http://www.metmuseum.org/art/collection/search/", o.ID),
})
}
close(ch)
csvWriter.WriteAll(rows)
fmt.Println(csvBytes.String())
}
func concatC(os []Constituent, field string) string {
var concat string
for _, o := range os {
r := reflect.ValueOf(o)
f := reflect.Indirect(r).FieldByName(field)
concat = fmt.Sprintf("%s, %s", concat, string(f.String()))
}
if len(concat) < 1 {
return ""
}
return concat[1:]
}
func concatG(os []Geography, field string) string {
var concat string
for _, o := range os {
r := reflect.ValueOf(o)
f := reflect.Indirect(r).FieldByName(field)
concat = fmt.Sprintf("%s, %s", concat, string(f.String()))
}
if len(concat) < 1 {
return ""
}
return concat[1:]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment