Skip to content

Instantly share code, notes, and snippets.

@RonenNess
Created August 25, 2019 15:37
Show Gist options
  • Save RonenNess/87973fc8715a2b4db2b4850b89b9902b to your computer and use it in GitHub Desktop.
Save RonenNess/87973fc8715a2b4db2b4850b89b9902b to your computer and use it in GitHub Desktop.
Tiny script to break large log files into multiple log files. Can work on any type of file, not just logs.
import sys
import os
# print usage
if len(sys.argv) < 2:
print ("Usage: split_logs.py filename [lines_per_file]")
exit(1)
# get filename
filename = sys.argv[1]
# get number of lines per file
lines_count_per_file = 100000
if len(sys.argv) == 3:
lines_count_per_file = int(sys.argv[2])
print("Lines per file:", lines_count_per_file)
# create directory for output
outfolder = os.path.basename(filename).replace('.', '_') + '_parts'
print("Output folder:", outfolder)
os.mkdir(outfolder)
# function to open next output file
file_index = 0
def open_next_output_file():
global file_index
fout = open(os.path.join(outfolder, "part_%d.txt" % file_index),"w")
file_index += 1
return fout
# split file
with open(filename, 'r') as fin:
# create first output file
fout = open_next_output_file()
# iterate lines
for i, line in enumerate(fin):
# write line to output
fout.write(line)
# check if time to break to another file
if (i + 1) % lines_count_per_file == 0:
fout.close()
fout = open_next_output_file()
print ("Processed: ", i * lines_count_per_file, " lines.")
fout.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment