Created
July 18, 2014 08:34
-
-
Save mdda/a11f7d9ce1143da4146b to your computer and use it in GitHub Desktop.
CSV-to-sqlite importer with simple DSL to map ports 'AIS' data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import sqlite3 | |
import re | |
import sys, csv | |
import datetime, time | |
db_file = "ports.db" | |
#vslRecID_n,vsl_m,vslTrgtPosnLat_q,vslTrgtPosnLong_q,vslCourse_q,vslTy_c,vslLen_q,vslGT_q,vslBre_q,vslSpeed_q, timeStamp_dt | |
#52589,, 1.308571696, 103.7263107, 0, TU, 54, 1373, 0, 0, Mar 31 2014 12:01:46:270AM | |
#68346,, 1.289833188, 104.0870514, 84.72840881,TU, 19, 89, 0, 2.988201141,Mar 31 2014 12:14:02:826AM | |
structure_ais = dict( | |
table="AIS", | |
skip_first=True, | |
ignore_zero_vid=True, | |
fields=[ | |
"vslRecID=vid INT", # - vessel ID. 0 means we're missing the vessel receiver ID | |
"vsl_m=0 CHAR(1)", # - vessel manager but unused | |
"vslTrgtPosnLat_q=lat REAL", # - vessel latitude | |
"vslTrgtPosnLong_q=lon REAL", # - vessel longitude | |
"vslCourse_q=course REAL", # - vessel direction | |
"vslTy_c=country CHAR(2)", # - vessel country of registration | |
"vslLen_q=l INT", # - vessel length | |
"vslGT_q=gt INT", # - gross tonnage (e.g. 6000 means volume in cubic feet) | |
"vslBre_q=beam INT", # - vessel beam | |
"vslSpeed_q=speed REAL", # - vessel speed in knots at the time of measurement | |
"timeStamp_dt=ts TS_AIS" # - timestamp when this record was made | |
], | |
structure_csv=[], | |
fields_sql=[], | |
create_sql="", | |
) | |
class CSVtoSQLite(): | |
#db_create_real = re.sub(r"\#.*?\n", "\n", """ | |
DB_HINTS_PARSE = r"^(.*?)\=(.*?)\s+(.*)" | |
def __init__(self, db_structure): | |
sql_create=[] | |
sql_fields=[] | |
sql_places=[] | |
structure=[] | |
for c in db_structure['fields']: | |
m = re.match(self.DB_HINTS_PARSE, c) | |
if m is not None: | |
(csv_field, sql_field, typ) = (m.group(1), m.group(2), m.group(3) ) | |
structure.append( dict(csv = csv_field, sql = sql_field, typ = typ) ) | |
if sql_field == "0": continue | |
if typ == "TS_AIS": typ = "INT" # Conversion will happen below | |
print "%s - %s - %s" % (csv_field, sql_field, typ) | |
sql_create.append("%s %s" % (sql_field, typ)) | |
sql_fields.append(sql_field) | |
sql_places.append('?') | |
db_structure['structure_csv'] = structure | |
db_structure['create_sql'] = ("CREATE TABLE %s (" % (db_structure['table'])) + ", \n".join(sql_create) + (");") | |
db_structure['fields_sql'] = sql_fields | |
db_structure['places_sql'] = sql_places | |
self.db_structure = db_structure | |
def create_if_not_there(self): | |
r = connection.execute("SELECT * FROM sqlite_master WHERE type = 'table' AND name = ?", (self.db_structure['table'],)) | |
if r.fetchone() is None: | |
connection.execute(self.db_structure['create_sql']) | |
def load_csv(self, filename): | |
#reader = csv.DictReader(open(filename)) | |
reader = csv.reader(open(filename)) | |
if self.db_structure['skip_first']: | |
throwaway = reader.next() | |
def return_place(field): return '?' | |
cols = ','.join(self.db_structure['fields_sql']) | |
places = ','.join(self.db_structure['places_sql']) | |
insert = "INSERT INTO %s (%s) VALUES (%s)" % (self.db_structure['table'], cols, places) | |
#print insert | |
for row in reader: | |
entry = [] | |
skip_row = False | |
for (c,v) in zip(self.db_structure['structure_csv'], row): | |
if c['sql'] == "0": continue | |
if c['sql'] == "vid" and v=="0" and self.db_structure['ignore_zero_vid']: skip_row=True | |
# https://docs.python.org/2/library/datetime.html#strftime-strptime-behavior | |
if c['typ'] == "TS_AIS": | |
#print "DATE:" + v | |
ts = datetime.datetime.strptime(v, "%b %d %Y %I:%M:%S:%f%p") # 'Apr 30 2014 12:00:42:240AM' | |
#v = "datetime(%s, 'unixepoch')" % (time.mktime(ts.timetuple()),) | |
v = time.mktime(ts.timetuple()) | |
entry.append(v) | |
if not skip_row: | |
#print insert | |
#print entry | |
connection.execute(insert, entry) | |
if reader.line_num % 10000 == 0 : | |
print "%30s : %6dk" % (filename, reader.line_num/1000) | |
connection.commit() | |
# http://www.tutorialspoint.com/sqlite/sqlite_python.htm | |
connection = sqlite3.connect(db_file) | |
dataset = '' | |
if len(sys.argv)<2: | |
print """Usage: | |
python csv_to_sqlite.py {ais} | |
Layout: | |
./db/csv_to_sqlite.py # This script | |
./db/ports.db # The sqlite database populated | |
./data/AIS/YYYY-MM/*.csv # AIS data | |
""" | |
else: | |
dataset = sys.argv[1] | |
if dataset.lower() == 'ais': | |
ais=CSVtoSQLite(structure_ais) | |
ais.create_if_not_there() | |
#ais.load_csv("../data/AIS/2014-05/PTMS_curTrgtP3History_01-May-2014.csv") | |
#ais.load_csv("../data/AIS/2014-05/PTMS_curTrgtP3History_02-May-2014.csv") | |
ais.load_csv("../data/AIS/2014-05/PTMS_curTrgtP3History_03-May-2014.csv") | |
connection.close() | |
#system("""echo -e ".mode csv\n.import ../data/AIS/2014-05/PTMS_curTrgtP3History_01-May-2014.csv AIS_RAW" | sqlite3 ports.db""") | |
#system("""echo -e ".import ../data/AIS/2014-05/PTMS_curTrgtP3History_01-May-2014.csv AIS_RAW" | sqlite3 -csv ports.db""") | |
""" | |
## https://gist.github.com/rgrp/5199059 | |
# ID INT PRIMARY KEY NOT NULL, | |
# NAME TEXT NOT NULL, | |
# AGE INT NOT NULL, | |
# ADDRESS CHAR(50), | |
# SALARY REAL | |
## http://www.tutorialspoint.com/sqlite/sqlite_indexes.htm | |
#CREATE INDEX index_name | |
#on table_name (column1, column2); | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment