Skip to content

Instantly share code, notes, and snippets.

@jfrobbins
Last active August 29, 2015 13:57
Show Gist options
  • Select an option

  • Save jfrobbins/9754201 to your computer and use it in GitHub Desktop.

Select an option

Save jfrobbins/9754201 to your computer and use it in GitHub Desktop.
#! /usr/bin/env python3
###
# filesplitter.py
#syntax is:
# ./filesplitter.py filename linesPerFile
###
import sys,os
class myfile(object):
def __init__(self):
self.data = ''
self.fname = ''
def readFile(self, fname, commenter=None):
#tries to read actual file into list split by \n
self.inFile = fname
sfile = ''
#check if file exists
if os.path.isfile(str(fname)):
#get absolute path:
fname = os.path.abspath(fname)
print('trying to read file: ' + fname)
with open(fname, 'r') as f:
print("file is opened")
sfile = f.readlines() #read the lines into a list
f.close()
#assign to class var:
self.fname = fname
else:
#attempt to use the fname var like an io object
try:
sfile = fname.readlines()
fname.close()
self.fname = fname.name
self.isFile = True
except AttributeError:
print('file does not exist: ' + str(fname))
return False
if sfile:
print("file was read")
if commenter:
#trim comments and remove blank lines:
sfile = [line[:line.find(commenter)] for line in sfile if line[:line.find(commenter)]]
if sfile[0] == '\ufeff':
#strip unicode BOM
sfile = sfile[1:]
self.data = sfile
return True
else:
print('file could not be read')
return False
return True
def returnData(self):
return self.data
def getColumnNames(self):
cols = [col for col in self.data[0]]
return cols
def split(self, linesPerFile):
hdr = self.data[0] # repeats per file
if linesPerFile <= 0:
linesPerFile = 50 #reasonable default
nFile = 0
nLine = 0
of = 0
for row in self.data[1:]:
if nLine == 0:
if of:
of.close()
#create new file and print header
oFileName = self.inFile + str(nFile)
print("writing output to file: " + oFileName)
of = open(oFileName, 'w')
of.write(hdr + '\n')
nFile += 1
of.write(row + '\n')
nLine += 1
if nLine > linesPerFile:
nLine = 0
if of:
of.close()
print("done writing file")
if __name__ == '__main__':
mydata = myfile()
if len(sys.argv) > 1:
fname = sys.argv[1]
linesPerFile = sys.argv[2]
else:
print("no args, assigning test case:")
fname = 'fakedata.csv'
print(fname)
if mydata.readFile(fname, '!'):
mydata.split(linesPerFile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment