Skip to content

Instantly share code, notes, and snippets.

@BtbN
Created January 6, 2015 17:27
Show Gist options
  • Select an option

  • Save BtbN/ed9dc9ab254d8f64fe79 to your computer and use it in GitHub Desktop.

Select an option

Save BtbN/ed9dc9ab254d8f64fe79 to your computer and use it in GitHub Desktop.
combine.py
#!/usr/bin/env python3
# 5 hinute parts
#files_per_part=410
# 6 hour parts
files_per_part=5400
input_dir="data/union"
output_dir="output"
import os
import sys
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import exists
os.chdir(os.path.dirname(__file__))
engine = create_engine('sqlite:///used_files.sqlite')
Base = declarative_base()
class PartFile(Base):
__tablename__ = 'partfiles'
name = Column(String, primary_key=True)
def __init__(self, fname):
self.name = fname
class ConfVal(Base):
__tablename__ = 'confvals'
name = Column(String, primary_key=True)
strval = Column(String)
intval = Column(Integer)
def __init__(self, name):
self.name = name
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
dirs = sorted([os.path.join(input_dir, d) for d in os.listdir(input_dir) if os.path.isdir(os.path.join(input_dir, d))])
files = []
for subdir in dirs:
subfiles = sorted([d for d in os.listdir(subdir) if os.path.isfile(os.path.join(subdir, d))])
for filen in subfiles:
if len(files) >= files_per_part:
break
fileo = '%s/%s' % (os.path.basename(subdir), filen[filen.find('-')+1:])
if session.query(exists().where(PartFile.name == fileo)).scalar():
continue
session.add(PartFile(fileo))
files += [os.path.abspath(os.path.join(subdir, filen))]
if len(files) < files_per_part:
print('At %s/%s input parts' % (len(files), files_per_part))
session.rollback()
sys.exit(-1)
partcnt = session.query(ConfVal).filter_by(name='partcount').first()
if partcnt is None:
partcnt = ConfVal('partcount')
partcnt.intval = 0
session.add(partcnt)
partcnt.intval += 1
oname = 'part%05d.concat.txt' % partcnt.intval
f = open(os.path.join(output_dir, oname), 'w')
for filen in files:
f.write("file '%s'\n" % filen)
f.close()
print('Wrote concat list %s' % oname)
session.commit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment