Last active
April 11, 2017 15:25
-
-
Save teknoraver/155b2f917d16d66f98abf3fdacb39c9a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (C) 2017 Matteo Croce <[email protected]> | |
* | |
* This program is free software: you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License as published by | |
* the Free Software Foundation, either version 3 of the License, or | |
* (at your option) any later version. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License | |
* along with this program. If not, see <http://www.gnu.org/licenses/>. | |
* | |
*/ | |
package main | |
import ( | |
"archive/zip" | |
"database/sql" | |
"encoding/csv" | |
"fmt" | |
"io" | |
"io/ioutil" | |
"net/http" | |
"os" | |
_ "github.com/mattn/go-sqlite3" | |
) | |
const ( | |
BaseUrl = "http://geodata.mit.gov.it/datasets/parco_circolante_" | |
DBSchema = `CREATE TABLE veicoli( | |
progressivo INT PRIMARY KEY, | |
tipo_veicolo TEXT NOT NULL, | |
destinazione TEXT, | |
uso TEXT, | |
comune_residenza TEXT NOT NULL, | |
provincia_residenza TEXT NOT NULL, | |
regione_residenza TEXT NOT NULL, | |
eta_intestatario INT, | |
sesso TEXT, | |
marca TEXT, | |
cilindrata INT, | |
alimentazione TEXT, | |
data_immatricolazione TEXT, | |
classe_euro INT, | |
emissioni_co2 INT, | |
massa_complessiva INT, | |
revisone_in_regola TEXT NOT NULL, | |
assicurazione_in_regola TEXT NOT NULL)` | |
) | |
var regioni = [...]string{ | |
"Abruzzo", | |
"Basilicata", | |
"Calabria", | |
"Campania", | |
"EmiliaRomagna", | |
"FriuliVeneziaGiulia", | |
"Lazio", | |
"Liguria", | |
"Lombardia", | |
"Marche", | |
"Molise", | |
"Piemonte", | |
"Puglia", | |
"Sardegna", | |
"Sicilia", | |
"Toscana", | |
"TrentinoAltoAdige", | |
"Umbria", | |
"ValleAosta", | |
"Veneto", | |
} | |
func insertLine(columns []string, tx *sql.Tx) { | |
if len(columns[12]) > 0 { | |
// Remove time from timestamp as it's always 00:00:00 | |
columns[12] = columns[12][:10] | |
} | |
_, err := tx.Exec(`INSERT OR REPLACE INTO veicoli VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)`, | |
columns[0], | |
columns[1], | |
sql.NullString{String: columns[2], Valid: len(columns[2]) > 0}, | |
sql.NullString{String: columns[3], Valid: len(columns[3]) > 0}, | |
columns[4], | |
columns[5], | |
columns[6], | |
sql.NullString{String: columns[7], Valid: len(columns[7]) > 0}, | |
sql.NullString{String: columns[8], Valid: len(columns[8]) > 0}, | |
sql.NullString{String: columns[9], Valid: len(columns[9]) > 0}, | |
sql.NullString{String: columns[10], Valid: len(columns[10]) > 0}, | |
sql.NullString{String: columns[11], Valid: len(columns[11]) > 0}, | |
sql.NullString{String: columns[12], Valid: len(columns[12]) > 0}, | |
sql.NullString{String: columns[13], Valid: len(columns[13]) > 0}, | |
sql.NullString{String: columns[14], Valid: len(columns[14]) > 0}, | |
sql.NullString{String: columns[15], Valid: len(columns[15]) > 0}, | |
columns[16], | |
columns[17], | |
) | |
if err != nil { | |
fmt.Printf("error inserting vehicle %s: %v\n", columns, err) | |
} | |
} | |
func getUnzip(url string) (*csv.Reader, io.ReadCloser, error) { | |
resp, err := http.Get(url) | |
if err != nil { | |
fmt.Println(err) | |
return nil, nil, err | |
} | |
// Remove the file after open, it will be deleted when the filedescriptor will be closed | |
zipfile, err := ioutil.TempFile("", "veicoli") | |
defer os.Remove(zipfile.Name()) | |
io.Copy(zipfile, resp.Body) | |
zipreader, err := zip.OpenReader(zipfile.Name()) | |
if err != nil { | |
fmt.Println(err) | |
return nil, nil, err | |
} | |
// There should be one CSV in the ZIP file | |
if len(zipreader.File) < 1 { | |
return nil, nil, fmt.Errorf("archive is empty") | |
} | |
uncompressed, err := zipreader.File[0].Open() | |
if err != nil { | |
return nil, nil, err | |
} | |
// Return the backing raeder too just to close it | |
return csv.NewReader(uncompressed), uncompressed, nil | |
} | |
func downloadRegione(regione string, db *sql.DB) int { | |
csvReader, closer, err := getUnzip(BaseUrl + regione + ".csv.zip") | |
if err != nil { | |
fmt.Println(err) | |
return 0 | |
} | |
defer closer.Close() | |
lines := 0 | |
tx, err := db.Begin() | |
// Drop the first line with column header | |
line, _ := csvReader.Read() | |
// Scan the CSV and insert lines | |
for line, err = csvReader.Read(); err != io.EOF; line, err = csvReader.Read() { | |
insertLine(line, tx) | |
lines++ | |
} | |
if err = tx.Commit(); err != nil { | |
panic(err) | |
} | |
return lines | |
} | |
func setupDB(path string) *sql.DB { | |
os.Remove(path) | |
// Create the DB | |
db, err := sql.Open("sqlite3", path) | |
if err != nil { | |
panic(err) | |
} | |
_, err = db.Exec(DBSchema) | |
if err != nil { | |
panic(err) | |
} | |
return db | |
} | |
func main() { | |
if len(os.Args) != 2 { | |
fmt.Println("usage:", os.Args[0], "<dbpath>") | |
return | |
} | |
db := setupDB(os.Args[1]) | |
fmt.Println("Init DB") | |
for _, regione := range regioni { | |
fmt.Printf("Scarico %s... ", regione) | |
veicoli := downloadRegione(regione, db) | |
fmt.Println(veicoli, "veicoli") | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment