Skip to content

Instantly share code, notes, and snippets.

@thom-nic
Created March 29, 2013 13:34
Show Gist options
  • Save thom-nic/5270881 to your computer and use it in GitHub Desktop.
Save thom-nic/5270881 to your computer and use it in GitHub Desktop.
Script to load EnerNOC Open data (see http://open.enernoc.com/data) into a relational database. Note that this resulted in a ~500 MB sqlite file for all 100 sites in the 2012 dataset. Once the data is loaded, you can make standard relational queries over the data such as: select r.* from readings r join sites s on r.site_id = s.site_id where s.i…
require 'sqlite3'
require 'csv'
module ENOC
##
# Functions to load EnerNOC Open Data (http://open.enernoc.com/data)
# into a sqlite database for relational access. This sould be easy
# to migrate to Postgres or MariaDB if you want a 'real' database.
#
module Sql
DB_FILE = 'anon/sql/all_data.db'
def self.create_schema db
db.execute_batch "pragma foreign_keys=ON;
create table if not exists sites (
site_id int not null,
industry varchar(25),
sub_industry varchar(40),
lat float,
lng float,
sq_ft int,
timezone varchar(20),
tzo varchar int );
create unique index if not exists site_site_id on sites( site_id asc );
create table if not exists readings (
id integer primary key,
site_id int not null
references sites( site_id ) on delete cascade,
timestamp int not null,
value int not null,
estimated bit default 0,
anomaly_type varchar(20) );
create unique index if not exists readings_site_id_timestamp
on readings( site_id, timestamp asc );"
puts "Created tables"
end
def self.import_metadata csv_file, db
db.transaction
counter = 0
db.execute "delete from sites"
db.prepare(
"insert into sites (
site_id,
industry, sub_industry,
sq_ft,
lat, lng,
timezone, tzo
) values (?,?,?,?,?,?,?,?)") { |stmt|
CSV.open( csv_file, 'r', :headers=> true ).each { |row|
stmt.execute(
row['SITE_ID'].to_i,
row['INDUSTRY'],
row['SUB_INDUSTRY'],
row['SQ_FT'].to_i,
row['LAT'].to_f,
row['LNG'].to_f,
row['TIME_ZONE'],
row['TZ_OFFSET'].split(':')[0].to_i )
counter += 1
}
}
db.commit
puts "Inserted #{counter} meta rows"
# res = db.get_first_value "select count(*) from sites"
# puts "=----- #{res}"
rescue Exception => err
db.rollback if db.transaction_active?
raise
end
def self.import_from_csv dir, db
db.execute "delete from readings"
Dir.glob( "#{dir}/*.csv" ).each { |file|
puts "Reading #{file}..."
counter = 0
db.transaction
site_id = file.split('/')[-1].split('.')[0].to_i
CSV.open( file, 'r', :headers=> true ).each { |row|
# puts row
anomaly = row['anomaly']
anomaly = nil unless anomaly.size > 0
db.execute(
"insert into readings (
site_id,
timestamp,
value,
estimated,
anomaly_type
) values (?,?,?,?,?)",
site_id,
row['timestamp'].to_i,
row['value'].to_f,
row['estimated'].to_i,
anomaly )
counter += 1
}
db.commit
puts "wrote #{counter} rows"
}
rescue Exception => err
db.rollback if db.transaction_active?
raise
end
def self.main
db = SQLite3::Database.new DB_FILE
# puts db.get_first_value 'SELECT SQLITE_VERSION()'
create_schema db
import_metadata 'anon/meta/all_sites.csv', db
import_from_csv 'anon/csv', db
db.close
end
end
end
if __FILE__ == $0
ENOC::Sql.main
end
@loveybot
Copy link

Yay Ruby!!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment