Created
February 23, 2018 05:18
-
-
Save pl77/b9240d8f25cca564b180dfaf8777c9e7 to your computer and use it in GitHub Desktop.
Internet Archive metadata schema attempt #1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
from sqlalchemy import Column, ForeignKey, Integer, String, UniqueConstraint, DateTime | |
from sqlalchemy.ext.declarative import declarative_base | |
from sqlalchemy import create_engine | |
from sqlalchemy.orm import sessionmaker, relationship, scoped_session | |
from sqlalchemy import event | |
Base = declarative_base() | |
class Server(Base): | |
__tablename__ = 'server' | |
# Here we define columns for the table person | |
# Notice that each column is also a normal Python instance attribute. | |
id = Column(Integer, primary_key=True) | |
name = Column(String, unique=True) | |
# rootserve = relationship("Rootdata") | |
def __repr__(self): | |
return '<Server %r>' % self.name | |
class Rootdata(Base): | |
__tablename__ = 'rootdata' | |
# Here we define columns for the table address. | |
# Notice that each column is also a normal Python instance attribute. | |
id = Column(Integer, primary_key=True) | |
uniq = Column(Integer, unique=True) | |
created = Column(Integer) | |
d1_server = Column(Integer, ForeignKey('server.id')) | |
d2_server = Column(Integer, ForeignKey('server.id')) | |
dir = Column(String) | |
files_count = Column(Integer) | |
is_collection = Column(Integer) | |
item_size = Column(Integer) | |
updated = Column(String) | |
mainserver = Column(Integer, ForeignKey('server.id')) | |
d1 = relationship("Server", foreign_keys=[d1_server]) | |
d2 = relationship("Server", foreign_keys=[d2_server]) | |
server = relationship("Server", foreign_keys=[mainserver]) | |
def __repr__(self): | |
return '<Rootdata %r>' % self.uniq | |
class Taskdata(Base): | |
__tablename__ = 'taskdata' | |
# Here we define columns for the table address. | |
# Notice that each column is also a normal Python instance attribute. | |
# {'task_id': "", 'identifier': "", 'cmd': "", 'args': "", 'submittime': "", 'submitter': "", 'server': "", 'finished': ""} | |
task_id = Column(Integer, primary_key=True) | |
identifier = Column(String) | |
cmd = Column(String) | |
args = Column(String) | |
submittime = Column(String) | |
submitter = Column(String) | |
server = Column(String) | |
finished = Column(String) | |
def __repr__(self): | |
return '<Taskdata %r>' % self.identifier | |
class Format(Base): | |
__tablename__ = 'format' | |
# Here we define columns for the table person | |
# Notice that each column is also a normal Python instance attribute. | |
id = Column(Integer, primary_key=True) | |
name = Column(String, unique=True) | |
def __repr__(self): | |
return '<Format %r>' % self.name | |
class Files(Base): | |
__tablename__ = 'files' | |
# Here we define columns for the table address. | |
# Notice that each column is also a normal Python instance attribute. | |
id = Column(Integer, primary_key=True) | |
uniq = Column(Integer, ForeignKey('rootdata.uniq')) | |
bitrate = Column(Integer) | |
btih = Column(String) | |
format = Column(Integer, ForeignKey('format.id')) | |
height = Column(Integer) | |
width = Column(Integer) | |
length = Column(Integer) | |
size = Column(Integer) | |
mtime = Column(Integer) | |
dir = Column(String) | |
name = Column(String) | |
crc32 = Column(String) | |
md5 = Column(String) | |
sha1 = Column(String) | |
title = Column(String) | |
__table_args__ = (UniqueConstraint('uniq', 'sha1', name='_uniq_sha1_uc'),) | |
def __repr__(self): | |
return '<Files %r>' % self.sha1 | |
class Mediatype(Base): | |
__tablename__ = 'mediatype' | |
# Here we define columns for the table person | |
# Notice that each column is also a normal Python instance attribute. | |
id = Column(Integer, primary_key=True) | |
name = Column(String, unique=True) | |
def __repr__(self): | |
return '<Mediatype %r>' % self.name | |
class Collection(Base): | |
__tablename__ = 'collection' | |
# Here we define columns for the table person | |
# Notice that each column is also a normal Python instance attribute. | |
id = Column(Integer, primary_key=True) | |
name = Column(String, unique=True) | |
def __repr__(self): | |
return '<Collection %r>' % self.name | |
class User(Base): | |
__tablename__ = 'user' | |
# Here we define columns for the table person | |
# Notice that each column is also a normal Python instance attribute. | |
id = Column(Integer, primary_key=True) | |
name = Column(String, unique=True) | |
def __repr__(self): | |
return '<User %r>' % self.name | |
class Language(Base): | |
__tablename__ = 'language' | |
# Here we define columns for the table person | |
# Notice that each column is also a normal Python instance attribute. | |
id = Column(Integer, primary_key=True) | |
name = Column(String, unique=True) | |
def __repr__(self): | |
return '<Language %r>' % self.name | |
class Metadata(Base): | |
__tablename__ = 'metadata' | |
# Here we define columns for the table address. | |
# Notice that each column is also a normal Python instance attribute. | |
id = Column(Integer, primary_key=True) | |
identifier = Column(String, unique=True) | |
mediatype = Column(Integer, ForeignKey('mediatype.id')) | |
collection = Column(Integer, ForeignKey('collection.id')) | |
creator = Column(Integer, ForeignKey('user.id')) | |
subject = Column(String) | |
date = Column(String) | |
description = Column(String) | |
language = Column(Integer, ForeignKey('language.id')) | |
noindex = Column(Integer) | |
publicdate = Column(Integer) | |
title = Column(String) | |
uniq = Column(Integer, ForeignKey('rootdata.uniq')) | |
uploader = Column(Integer, ForeignKey('user.id')) | |
def __repr__(self): | |
return '<Metadata %r>' % self.identifier | |
def _fk_pragma_on_connect(dbapi_con, con_record): | |
dbapi_con.execute("PRAGMA synchronous = OFF") | |
dbapi_con.execute("PRAGMA journal_mode = MEMORY") | |
# Create an engine that stores data in the local directory's | |
# sqlalchemy_example.db file. | |
engine = create_engine('sqlite:///D:\\archive\\internetarchive.db3', connect_args={'timeout': 30}) | |
# Create all tables in the engine. This is equivalent to "Create Table" | |
# statements in raw SQL. | |
Base.metadata.create_all(engine) | |
Base.metadata.bind = engine | |
DBSession = sessionmaker(bind=engine) | |
# A DBSession() instance establishes all conversations with the database | |
# and represents a "staging zone" for all the objects loaded into the | |
# database session object. Any change made against the objects in the | |
# session won't be persisted into the database until you call | |
# session.commit(). If you're not happy about the changes, you can | |
# revert all of them back to the last commit by calling | |
# session.rollback() | |
Session = scoped_session(DBSession) | |
filesession = Session() | |
event.listen(engine, 'connect', _fk_pragma_on_connect) | |
mediatypes = ['account', 'audio', 'collection', 'data', 'etree', 'image', 'movies', 'software', 'texts', 'web'] | |
for mtype in mediatypes: | |
if filesession.query(Mediatype).filter_by(name=mtype).count() < 1: | |
mediatype = Mediatype(name=mtype) | |
filesession.add(mediatype) | |
filesession.commit() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment