Created
October 18, 2011 01:57
-
-
Save dkubb/1294429 to your computer and use it in GitHub Desktop.
Reflect on a PostgreSQL Database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# gem install backports veritas veritas-optimizer veritas-do-adapter do_postgres | |
require 'pp' | |
require 'rational' | |
require 'rubygems' | |
require 'backports' | |
require 'backports/basic_object' | |
require 'veritas' | |
require 'veritas-optimizer' | |
require 'veritas-do-adapter' | |
require 'do_postgres' | |
URI = 'postgres://dkubb:@localhost/test' | |
DataObjects.logger.set_log($stderr, :debug) | |
def setup_tables | |
connection = DataObjects::Connection.new(URI) | |
DATA.read.split(';').each do |statement| | |
statement.strip! | |
next if statement.empty? | |
connection.create_command(statement).execute_non_query | |
end | |
connection.close | |
end | |
def initial_schema | |
# define the minimum set of relations for bootstrapping | |
pg_class = Veritas::Relation::Base.new('pg_class', [ | |
[ :oid, Integer ], | |
[ :relname, String ], | |
]) | |
pg_attribute = Veritas::Relation::Base.new('pg_attribute', [ | |
[ :attname, String ], | |
[ :attlen, Integer ], | |
[ :atttypmod, Integer ], | |
[ :attnum, Integer ], | |
[ :attrelid, Integer ], | |
[ :atttypid, Integer ], | |
Veritas::Attribute::Boolean.new(:attnotnull), | |
Veritas::Attribute::Boolean.new(:atthasdef), | |
]) | |
pg_type = Veritas::Relation::Base.new('pg_type', [ | |
[ :oid, Integer ], | |
[ :typname, String ], | |
]) | |
pg_attrdef = Veritas::Relation::Base.new('pg_attrdef', [ | |
[ :adrelid, Integer ], | |
[ :adnum, Integer ], | |
[ :adsrc, String ], | |
]) | |
return pg_class, pg_attribute, pg_type, pg_attrdef | |
end | |
def rename_tables(pg_class, pg_attribute, pg_type, pg_attrdef) | |
pg_class_renames = { | |
:oid => :pg_class_id, | |
:relname => :table_name, | |
} | |
pg_attribute_renames = { | |
:attname => :name, | |
:attlen => :length, | |
:atttypmod => :modifier, | |
:attnum => :position, | |
:attrelid => :pg_class_id, | |
:atttypid => :pg_type_id, | |
:attnotnull => :required, | |
:atthasdef => :default, | |
} | |
pg_type_renames = { | |
:oid => :pg_type_id, | |
:typname => :type, | |
} | |
pg_attrdef_renames = { | |
:adrelid => :pg_class_id, | |
:adnum => :position, | |
:adsrc => :default_value, | |
} | |
pg_class = pg_class.rename(pg_class_renames).project(pg_class_renames.values) | |
pg_attribute = pg_attribute.rename(pg_attribute_renames).project(pg_attribute_renames.values) | |
pg_type = pg_type.rename(pg_type_renames).project(pg_type_renames.values) | |
pg_attrdef = pg_attrdef.rename(pg_attrdef_renames).project(pg_attrdef_renames.values) | |
return pg_class, pg_attribute, pg_type, pg_attrdef | |
end | |
def table_reflection_relation(pg_class, pg_attribute, pg_type, pg_attrdef) | |
relation = pg_class.join(pg_attribute).join(pg_type) | |
relation = relation.restrict { |r| r.position.gt(0) } | |
relation = relation.project([ :position, :table_name, :name, :type, :length, :modifier, :required, :default ]) | |
# TODO: remove types from this list once the mapping becomes more clear | |
relation = relation.restrict { |r| r.type.exclude(%w[ sql_identifier character_data cardinal_number time_stamp yes_or_no cstring int2vector oidvector anyarray abstime ]) } | |
relation.sort_by { relation.header }.optimize | |
end | |
def tables(relation) | |
# TODO: reflect on indexes and primary key | |
# TODO: reflect on check constraints | |
# TODO: reflect on foreign keys | |
# TODO: join to pg_attrdef to get the column defaults first, and put them into a | |
# map so that they can be looked up below | |
tables = Hash.new { |tables,name| tables[name] = [] } | |
relation.each do |tuple| | |
# TODO: change this to a lookup table | |
klass, options = case tuple[:type] | |
when 'int2' | |
[ Veritas::Attribute::Integer, { :size => -2**15..2**15-1 } ] | |
when 'int4', 'oid', 'regproc', 'xid', 'tid', 'cid' | |
[ Veritas::Attribute::Integer, { :size => -2**31..2**31-1 } ] | |
when 'int8' | |
[ Veritas::Attribute::Integer, { :size => -2**63..2**63-1 } ] | |
when 'float4' | |
[ Veritas::Attribute::Float, { :size => 1E-37..1E+37 } ] # TODO: set 6 dec. precision | |
when 'float8' | |
[ Veritas::Attribute::Float, { :size => 1E-307..1E+308 } ] # TODO: set 15 dec. precision | |
when 'numeric' | |
[ Veritas::Attribute::Decimal, { :size => -1.0/0..1.0/0 } ] | |
when 'money' | |
[ Veritas::Attribute::Decimal, { :size => Rational(-2**63, 100)..Rational(2**63, 100) } ] # TODO: set 2 dec. precision | |
when 'bpchar', 'char', 'name' | |
[ Veritas::Attribute::String, { :max_length => tuple[:length] } ] | |
when 'varchar' | |
[ Veritas::Attribute::String, { :max_length => tuple[:modifier] - 4 } ] | |
when 'text', 'bytea', 'xml' | |
[ Veritas::Attribute::String, { :max_length => 1.0/0 } ] | |
when 'bit' | |
[ Veritas::Attribute::String, { :min_length => tuple[:modifier], :max_length => tuple[:modifier] } ] | |
when 'varbit' | |
[ Veritas::Attribute::String, { :min_length => 0, :max_length => tuple[:modifier] } ] | |
when 'uuid' | |
[ Veritas::Attribute::String, { :min_length => 36, :max_length => 36 } ] | |
when 'timestamp', 'timestamptz' | |
Veritas::Attribute::DateTime | |
when 'date' | |
Veritas::Attribute::Date | |
when 'bool' | |
Veritas::Attribute::Boolean | |
when 'time', 'interval', | |
'point', 'line', 'lseg', 'box', 'path', 'polygon', 'circle', | |
'cidr', 'inet', 'macaddr', | |
'tsvector', 'tsquery', | |
/\A_/ # an array type # TODO: verify the types are allowed individually | |
Veritas::Attribute::String # default handler | |
else | |
raise "unknown type: #{tuple[:type]}" | |
end | |
options ||= {} | |
options[:required] = tuple[:required] | |
# TODO: set the default | |
tables[tuple[:table_name]] << klass.new(tuple[:name], options) | |
end | |
tables.map do |name, attributes| | |
Veritas::Relation::Base.new(name, attributes) | |
end | |
end | |
def load_schema(adapter) | |
relation = table_reflection_relation(*rename_tables(*initial_schema)) | |
gateway = Veritas::Relation::Gateway.new(adapter, relation) | |
# TODO: return some kind of object that represents a Registry or Schema | |
Hash[ | |
tables(gateway).map do |base_relation| | |
[ base_relation.name, Veritas::Relation::Gateway.new(adapter, base_relation) ] | |
end | |
] | |
end | |
# ------------------------------------------------------------------------------ | |
setup_tables | |
adapter = Veritas::Adapter::DataObjects.new(URI) | |
schema = load_schema(adapter) | |
people = schema['people'] | |
pp people.to_a | |
__END__ | |
DROP TABLE IF EXISTS "people"; | |
CREATE TABLE "people" ("id" SERIAL NOT NULL PRIMARY KEY, "name" VARCHAR(50) NOT NULL UNIQUE CHECK (CHAR_LENGTH(name) >= 1)); | |
INSERT INTO people (name) VALUES('Dan Kubb'); | |
INSERT INTO people (name) VALUES('Barabra-Ann Kubb'); | |
INSERT INTO people (name) VALUES('John Doe'); | |
INSERT INTO people (name) VALUES('Jane Doe'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@emmanuel thanks! There's still a way to go with reflection, but I'm pretty confident this spike reflects how the database represents the tables and columns.
I need to reflect on CHECK constraints data eventually, which may get interesting if I have to parse SQL. I'm not sure yet how I'm going to handle that, I don't want to get side tracked on an SQL parser, even though I think it may be constrained due to limitations in what a CHECK can contain. I might just punt on this for now until we've got other things worked out.
BTW In think reflection is going to have utility far beyond just migrations. We'll be able to use it to create mappers and object classes for AR style interaction automatically if we want. We'll be able to run some checks when a model declares some attributes (DM1 style), to make sure the columns all exist and the constraints in the DB are not stronger than what's defined in the model (thus causing runtime errors). And probably the most pragmatic reason is that we'll just be able to use Veritas with an existing schema without having to do any setup, just knowing the table names.