nathanshipley · June 19, 2025 02:32
diff --git a/splitByLines_DateRange.py b/splitByLines_DateRange.py
 #!/usr/bin/env python3
 import sys
 import os
 import re
 from datetime import datetime

 def parse_date_from_line(line):
    """
    Given a line like "[2/22/2025 12:12 PM] s2k:", extract "2/22/2025",
    parse it into a datetime.date, and return as a string "YYYY-MM-DD".
    Returns None if no date is found.
    """
    m = re.match(r'\[(\d{1,2}/\d{1,2}/\d{4})\s+\d{1,2}:\d{2}\s+[AP]M\]', line)
    if not m:
        return None
    mdyy = m.group(1)  # e.g. "2/22/2025"
    try:
        dt = datetime.strptime(mdyy, "%m/%d/%Y")
        return dt.strftime("%Y-%m-%d")
    except ValueError:
        return None

 def split_by_line_count_with_date(input_path, lines_per_file=20000):
    base_name, ext = os.path.splitext(os.path.basename(input_path))
    output_dir = os.path.dirname(os.path.abspath(input_path))

    file_counter = 1
    line_counter = 0

    temp_filename = os.path.join(output_dir, f"{base_name}_temp_part{file_counter}{ext}")
    current_out = open(temp_filename, 'w', encoding='utf-8')

    date_from = None
    date_to = None

    with open(input_path, 'r', encoding='utf-8') as infile:
        for line in infile:
            # Each time we hit a new chunk boundary, close and rename the current file
            if line_counter > 0 and line_counter % lines_per_file == 0:
                current_out.close()
                # Build new filename using the date range we collected
                if date_from and date_to:
                    new_name = f"{base_name}_part{file_counter}_{date_from}_to_{date_to}{ext}"
                else:
                    new_name = f"{base_name}_part{file_counter}_nodateinfo{ext}"
                os.replace(temp_filename, os.path.join(output_dir, new_name))

                # Reset for next chunk
                file_counter += 1
                date_from = None
                date_to = None
                temp_filename = os.path.join(output_dir, f"{base_name}_temp_part{file_counter}{ext}")
                current_out = open(temp_filename, 'w', encoding='utf-8')

            # Write the current line
            current_out.write(line)
            line_counter += 1

            # Check if this line contains a date, and update date_from/date_to
            parsed_date = parse_date_from_line(line)
            if parsed_date:
                if date_from is None:
                    date_from = parsed_date
                date_to = parsed_date

    # After the loop, close and rename the last chunk
    if current_out:
        current_out.close()
        if date_from and date_to:
            new_name = f"{base_name}_part{file_counter}_{date_from}_to_{date_to}{ext}"
        else:
            new_name = f"{base_name}_part{file_counter}_nodateinfo{ext}"
        os.replace(temp_filename, os.path.join(output_dir, new_name))

    print(f"Split complete: {file_counter} files created.")

 if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python split_by_lines_with_dates.py input.txt")
        sys.exit(1)

    input_file = sys.argv[1]
    if not os.path.isfile(input_file):
        print(f"Error: File not found: {input_file}")
        sys.exit(1)

    split_by_line_count_with_date(input_file)
	#!/usr/bin/env python3
	import sys
	import os
	import re
	from datetime import datetime

	def parse_date_from_line(line):
	"""
	Given a line like "[2/22/2025 12:12 PM] s2k:", extract "2/22/2025",
	parse it into a datetime.date, and return as a string "YYYY-MM-DD".
	Returns None if no date is found.
	"""
	m = re.match(r'\[(\d{1,2}/\d{1,2}/\d{4})\s+\d{1,2}:\d{2}\s+[AP]M\]', line)
	if not m:
	return None
	mdyy = m.group(1) # e.g. "2/22/2025"
	try:
	dt = datetime.strptime(mdyy, "%m/%d/%Y")
	return dt.strftime("%Y-%m-%d")
	except ValueError:
	return None

	def split_by_line_count_with_date(input_path, lines_per_file=20000):
	base_name, ext = os.path.splitext(os.path.basename(input_path))
	output_dir = os.path.dirname(os.path.abspath(input_path))

	file_counter = 1
	line_counter = 0

	temp_filename = os.path.join(output_dir, f"{base_name}_temp_part{file_counter}{ext}")
	current_out = open(temp_filename, 'w', encoding='utf-8')

	date_from = None
	date_to = None

	with open(input_path, 'r', encoding='utf-8') as infile:
	for line in infile:
	# Each time we hit a new chunk boundary, close and rename the current file
	if line_counter > 0 and line_counter % lines_per_file == 0:
	current_out.close()
	# Build new filename using the date range we collected
	if date_from and date_to:
	new_name = f"{base_name}_part{file_counter}_{date_from}_to_{date_to}{ext}"
	else:
	new_name = f"{base_name}_part{file_counter}_nodateinfo{ext}"
	os.replace(temp_filename, os.path.join(output_dir, new_name))

	# Reset for next chunk
	file_counter += 1
	date_from = None
	date_to = None
	temp_filename = os.path.join(output_dir, f"{base_name}_temp_part{file_counter}{ext}")
	current_out = open(temp_filename, 'w', encoding='utf-8')

	# Write the current line
	current_out.write(line)
	line_counter += 1

	# Check if this line contains a date, and update date_from/date_to
	parsed_date = parse_date_from_line(line)
	if parsed_date:
	if date_from is None:
	date_from = parsed_date
	date_to = parsed_date

	# After the loop, close and rename the last chunk
	if current_out:
	current_out.close()
	if date_from and date_to:
	new_name = f"{base_name}_part{file_counter}_{date_from}_to_{date_to}{ext}"
	else:
	new_name = f"{base_name}_part{file_counter}_nodateinfo{ext}"
	os.replace(temp_filename, os.path.join(output_dir, new_name))

	print(f"Split complete: {file_counter} files created.")

	if __name__ == "__main__":
	if len(sys.argv) != 2:
	print("Usage: python split_by_lines_with_dates.py input.txt")
	sys.exit(1)

	input_file = sys.argv[1]
	if not os.path.isfile(input_file):
	print(f"Error: File not found: {input_file}")
	sys.exit(1)

	split_by_line_count_with_date(input_file)