Created
August 25, 2019 15:37
-
-
Save RonenNess/87973fc8715a2b4db2b4850b89b9902b to your computer and use it in GitHub Desktop.
Tiny script to break large log files into multiple log files. Can work on any type of file, not just logs.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
# print usage | |
if len(sys.argv) < 2: | |
print ("Usage: split_logs.py filename [lines_per_file]") | |
exit(1) | |
# get filename | |
filename = sys.argv[1] | |
# get number of lines per file | |
lines_count_per_file = 100000 | |
if len(sys.argv) == 3: | |
lines_count_per_file = int(sys.argv[2]) | |
print("Lines per file:", lines_count_per_file) | |
# create directory for output | |
outfolder = os.path.basename(filename).replace('.', '_') + '_parts' | |
print("Output folder:", outfolder) | |
os.mkdir(outfolder) | |
# function to open next output file | |
file_index = 0 | |
def open_next_output_file(): | |
global file_index | |
fout = open(os.path.join(outfolder, "part_%d.txt" % file_index),"w") | |
file_index += 1 | |
return fout | |
# split file | |
with open(filename, 'r') as fin: | |
# create first output file | |
fout = open_next_output_file() | |
# iterate lines | |
for i, line in enumerate(fin): | |
# write line to output | |
fout.write(line) | |
# check if time to break to another file | |
if (i + 1) % lines_count_per_file == 0: | |
fout.close() | |
fout = open_next_output_file() | |
print ("Processed: ", i * lines_count_per_file, " lines.") | |
fout.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment