sevab · January 8, 2025 08:51
diff --git a/filter_csv_by_substring.py b/filter_csv_by_substring.py
 """
 This script filters rows from a CSV file based on a case-insensitive substring match.

 Usage:
    python script.py <csv_filename> <substring> [--preserve-header] [--columns=column1,column2,...]

 Arguments:
    <csv_filename>         The name of the input CSV file to process.
    <substring>            The substring to search for (case-insensitive).
    [--preserve-header]    (Optional) Include this flag to preserve the header row in the output file.
    [--columns=...]        (Optional) Specify a comma-separated list of column names to search against.
                           If not provided, all columns will be searched.

 Output:
    A new CSV file named "filtered_<csv_filename>" is created, containing only the rows
    that match the specified substring. If the --preserve-header flag is used, the header
    row will also be included in the output file.

 Example:
    python script.py data.csv apple --preserve-header --columns=name,address
    - This will create a file "filtered_data.csv" containing rows where the substring "apple"
      (case-insensitive) appears in the "name" or "address" columns, including the header row.

 Error Handling:
    - If the input file does not exist, an error message is displayed.
    - Other exceptions are caught and printed for debugging purposes.

 Author:
    Seva Baskin
 """

 import csv
 import sys

 if len(sys.argv) < 3:
    print("Usage: python script.py <csv_filename> <substring> [--preserve-header] [--columns=column1,column2,...]")
    sys.exit(1)

 input_filename = sys.argv[1]
 substring = sys.argv[2].lower()
 preserve_header = "--preserve-header" in sys.argv
 columns_arg = next((arg for arg in sys.argv if arg.startswith("--columns=")), None)
 columns = columns_arg.split("=", 1)[1].split(",") if columns_arg else None

 try:
    output_filename = f"filtered_{input_filename}"

    with open(input_filename, mode="r", encoding="utf-8") as infile, \
         open(output_filename, mode="w", encoding="utf-8", newline="") as outfile:

        reader = csv.reader(infile)
        writer = csv.writer(outfile)

        header = next(reader, None)

        if preserve_header and header:
            writer.writerow(header)

        # Map column names to indices if columns are specified
        column_indices = None
        if columns and header:
            column_indices = [header.index(col) for col in columns if col in header]

        for row in reader:
            if column_indices:
                # Search only specified columns
                if any(substring in row[idx].lower() for idx in column_indices):
                    writer.writerow(row)
            else:
                # Search all columns
                if any(substring in cell.lower() for cell in row):
                    writer.writerow(row)

    print(f"Filtered rows written to {output_filename}")

 except FileNotFoundError:
    print(f"Error: File {input_filename} not found.")
 except ValueError as e:
    print(f"Error: {e}. Check if column names are correct.")
 except Exception as e:
    print(f"An error occurred: {e}")
	"""
	This script filters rows from a CSV file based on a case-insensitive substring match.

	Usage:
	python script.py <csv_filename> <substring> [--preserve-header] [--columns=column1,column2,...]

	Arguments:
	<csv_filename> The name of the input CSV file to process.
	<substring> The substring to search for (case-insensitive).
	[--preserve-header] (Optional) Include this flag to preserve the header row in the output file.
	[--columns=...] (Optional) Specify a comma-separated list of column names to search against.
	If not provided, all columns will be searched.

	Output:
	A new CSV file named "filtered_<csv_filename>" is created, containing only the rows
	that match the specified substring. If the --preserve-header flag is used, the header
	row will also be included in the output file.

	Example:
	python script.py data.csv apple --preserve-header --columns=name,address
	- This will create a file "filtered_data.csv" containing rows where the substring "apple"
	(case-insensitive) appears in the "name" or "address" columns, including the header row.

	Error Handling:
	- If the input file does not exist, an error message is displayed.
	- Other exceptions are caught and printed for debugging purposes.

	Author:
	Seva Baskin
	"""

	import csv
	import sys

	if len(sys.argv) < 3:
	print("Usage: python script.py <csv_filename> <substring> [--preserve-header] [--columns=column1,column2,...]")
	sys.exit(1)

	input_filename = sys.argv[1]
	substring = sys.argv[2].lower()
	preserve_header = "--preserve-header" in sys.argv
	columns_arg = next((arg for arg in sys.argv if arg.startswith("--columns=")), None)
	columns = columns_arg.split("=", 1)[1].split(",") if columns_arg else None

	try:
	output_filename = f"filtered_{input_filename}"

	with open(input_filename, mode="r", encoding="utf-8") as infile, \
	open(output_filename, mode="w", encoding="utf-8", newline="") as outfile:

	reader = csv.reader(infile)
	writer = csv.writer(outfile)

	header = next(reader, None)

	if preserve_header and header:
	writer.writerow(header)

	# Map column names to indices if columns are specified
	column_indices = None
	if columns and header:
	column_indices = [header.index(col) for col in columns if col in header]

	for row in reader:
	if column_indices:
	# Search only specified columns
	if any(substring in row[idx].lower() for idx in column_indices):
	writer.writerow(row)
	else:
	# Search all columns
	if any(substring in cell.lower() for cell in row):
	writer.writerow(row)

	print(f"Filtered rows written to {output_filename}")

	except FileNotFoundError:
	print(f"Error: File {input_filename} not found.")
	except ValueError as e:
	print(f"Error: {e}. Check if column names are correct.")
	except Exception as e:
	print(f"An error occurred: {e}")