xyb · April 4, 2023 08:39
diff --git a/excel_tools.py b/excel_tools.py
 import csv
 import io
 import logging

 import openpyxl

 logger = logging.getLogger("excel")


 def excel_to_csv(path):
    """
    Converts an Excel file to a list of CSV strings,
    one for each sheet in the Excel file.

    Args:
    - path (str): The path to the Excel file to be converted.

    Returns:
    - result (list): A list of dictionaries, where each dictionary
      contains the following keys:
    - sheet_no (int): The number of the sheet in the Excel file.
    - sheet_name (str): The name of the sheet in the Excel file.
    - csv_content (str): The CSV content of the sheet.

    Raises:
    - FileNotFoundError: If the Excel file does not exist.
    - openpyxl.utils.exceptions.InvalidFileException: If the Excel
      file is not a valid Excel file.
    """
    wb = openpyxl.load_workbook(path)

    result = []
    for id, sheet_name in enumerate(wb.sheetnames):
        sheet = wb[sheet_name]
        csv_content = io.StringIO()
        csv_writer = csv.writer(csv_content)
        for row in sheet.iter_rows(values_only=True):
            csv_writer.writerow(row)
        result.append(
            dict(
                sheet_no=id + 1,
                sheet_name=sheet_name,
                csv_content=csv_content.getvalue(),
            ),
        )

    return result


 def sheets_to_excel(path, sheets):
    """
    Write all sheets into a excel file.

    It takes in two parameters: path and sheets. The path parameter is the path
    to the excel file. The sheets parameter is a list of tuples, where each tuple
    contains the sheet name and the csv content. The csv content is in csv format,
    where the first line is the field names and the other lines are the data itself.

    Example Usage:
        content = 'a,b,c\\n1,2,3\\n4,5,6\\n'
        excel_path = '/tmp/test.xlsx'
        sheets_to_excel(excel_path, [('test sheet', content)])

    Parameters:
    - path (str): The path to the excel file.
    - sheets (list): A list of tuples, where each tuple contains the sheet name
      and the csv content.

    Returns:
    - None

    >>> content = 'a,b,c\\n1,2,3\\n'
    >>> excel_path = '/tmp/test.xlsx'
    >>> sheets_to_excel(excel_path, [('test', content)])
    >>> excel_to_csv(excel_path)
    [{'sheet_no': 1, 'sheet_name': 'test', 'csv_content': 'a,b,c\\r\\n1,2,3\\r\\n'}]
    >>> sheets_to_excel(excel_path, [])
    >>> excel_to_csv(excel_path)
    [{'sheet_no': 1, 'sheet_name': 'Sheet', 'csv_content': ''}]
    """
    workbook = openpyxl.Workbook()
    default_sheet = workbook["Sheet"]
    if sheets:
        workbook.remove(default_sheet)
    for sheet in sheets:
        sheet_name, csv_content = sheet
        worksheet = workbook.create_sheet(title=sheet_name)
        reader = csv.reader(csv_content.split("\n"))
        for row_index, row in enumerate(reader):
            for col_index, cell_value in enumerate(row):
                worksheet.cell(
                    row=row_index + 1,
                    column=col_index + 1,
                    value=cell_value,
                )
    workbook.save(path)


 def merge_same_cell(xlsx_filename):
    """
    >>> content = 'a,b,c\\na,b,e\\na,d,e'
    >>> excel_path = '/tmp/test.xlsx'
    >>> sheets_to_excel(excel_path, [('test', content)])
    >>> excel_to_csv(excel_path)
    [... 'csv_content': 'a,b,c\\r\\na,b,e\\r\\na,d,e\\r\\n'}]
    >>> merge_same_cell(excel_path)
    >>> excel_to_csv(excel_path)
    [... 'csv_content': 'a,b,c\\r\\n,,e\\r\\n,d,\\r\\n'}]
    """
    wb = openpyxl.load_workbook(xlsx_filename)
    for sheet in wb:
        merge_list = []
        for col in sheet.columns:
            start_row = 1
            for i in range(1, len(col)):
                if col[i].value != col[i - 1].value:
                    if start_row != i:
                        cell_range = (
                            sheet.cell(row=start_row, column=col[0].column).coordinate
                            + ":"
                            + sheet.cell(row=i, column=col[0].column).coordinate
                        )
                        merge_list.append(cell_range)
                    start_row = i + 1
            if start_row != len(col) + 1:
                cell_range = (
                    sheet.cell(row=start_row, column=col[0].column).coordinate
                    + ":"
                    + sheet.cell(row=len(col), column=col[0].column).coordinate
                )
                merge_list.append(cell_range)
        for cell_range in merge_list:
            if cell_range in sheet.merged_cells:
                sheet.unmerge_cells(cell_range)
            sheet.merge_cells(cell_range)
    wb.save(xlsx_filename)


 def adjust_col_width_to_fit_content(xlsx_filename):
    """
    Adjusts the column width of an Excel file to fit its content.

    Args:
    - xlsx_filename (str): The path to the Excel file.

    Returns:
    - None
    """
    wb = openpyxl.load_workbook(xlsx_filename)
    for sheet in wb:
        for col in sheet.columns:
            max_length = 0
            column = col[0].column_letter  # Get the column name
            for cell in col:
                try:  # Necessary to avoid error on empty cells
                    if len(str(cell.value)) > max_length:
                        max_length = len(cell.value)
                except Exception:
                    pass
            adjusted_width = (max_length + 2) * 1.2  # Adjust the width
            sheet.column_dimensions[column].width = adjusted_width
    wb.save(xlsx_filename)


 def auto_remove_unused_cols_and_rows(xlsx_filename):
    wb = openpyxl.load_workbook(xlsx_filename)

    for sheet in wb:
        # Get the range of cells that contain data
        used_range = sheet.calculate_dimension()

        # Get the maximum row and column indices in the used range
        max_row, min_col, max_col, min_row = openpyxl.utils.cell.range_boundaries(
            used_range,
        )

        # Identify the header row
        header_row = None
        for row in range(min_row, max_row + 1):
            if all(cell.value is None for cell in sheet[row]):
                header_row = row
            else:
                break

        # Delete any unused columns
        for col in range(max_col, sheet.max_column + 1):
            if all(
                sheet[row][col].value is None for row in range(min_row, max_row + 1)
            ):
                sheet.delete_cols(col)

        # Delete any unused rows
        if header_row is not None:
            for row in range(max_row, header_row - 1, -1):
                if all(
                    sheet[row][col].value is None for col in range(min_col, max_col + 1)
                ):
                    sheet.delete_rows(row)
        else:
            for row in range(max_row, 0, -1):
                if all(
                    sheet[row][col].value is None for col in range(min_col, max_col + 1)
                ):
                    sheet.delete_rows(row)

        # Unmerge any merged cells before deleting them
        for merged_cell_range in sheet.merged_cells.ranges:
            merged_cell_range.unmerge()

    wb.save(xlsx_filename)


 def auto_height_row(xlsx_filename):
    wb = openpyxl.load_workbook(xlsx_filename)

    for sheet in wb:
        for row in sheet.rows:
            max_height = 0
            for cell in row:
                try:
                    if cell.value:
                        cell_height = len(str(cell.value)) * 1.2
                        if cell_height > max_height:
                            max_height = cell_height
                except Exception:
                    pass
            if max_height > 0:
                sheet.row_dimensions[row[0].row].height = max_height

    wb.save(xlsx_filename)
	import csv
	import io
	import logging

	import openpyxl

	logger = logging.getLogger("excel")


	def excel_to_csv(path):
	"""
	Converts an Excel file to a list of CSV strings,
	one for each sheet in the Excel file.

	Args:
	- path (str): The path to the Excel file to be converted.

	Returns:
	- result (list): A list of dictionaries, where each dictionary
	contains the following keys:
	- sheet_no (int): The number of the sheet in the Excel file.
	- sheet_name (str): The name of the sheet in the Excel file.
	- csv_content (str): The CSV content of the sheet.

	Raises:
	- FileNotFoundError: If the Excel file does not exist.
	- openpyxl.utils.exceptions.InvalidFileException: If the Excel
	file is not a valid Excel file.
	"""
	wb = openpyxl.load_workbook(path)

	result = []
	for id, sheet_name in enumerate(wb.sheetnames):
	sheet = wb[sheet_name]
	csv_content = io.StringIO()
	csv_writer = csv.writer(csv_content)
	for row in sheet.iter_rows(values_only=True):
	csv_writer.writerow(row)
	result.append(
	dict(
	sheet_no=id + 1,
	sheet_name=sheet_name,
	csv_content=csv_content.getvalue(),
	),
	)

	return result


	def sheets_to_excel(path, sheets):
	"""
	Write all sheets into a excel file.

	It takes in two parameters: path and sheets. The path parameter is the path
	to the excel file. The sheets parameter is a list of tuples, where each tuple
	contains the sheet name and the csv content. The csv content is in csv format,
	where the first line is the field names and the other lines are the data itself.

	Example Usage:
	content = 'a,b,c\\n1,2,3\\n4,5,6\\n'
	excel_path = '/tmp/test.xlsx'
	sheets_to_excel(excel_path, [('test sheet', content)])

	Parameters:
	- path (str): The path to the excel file.
	- sheets (list): A list of tuples, where each tuple contains the sheet name
	and the csv content.

	Returns:
	- None

	>>> content = 'a,b,c\\n1,2,3\\n'
	>>> excel_path = '/tmp/test.xlsx'
	>>> sheets_to_excel(excel_path, [('test', content)])
	>>> excel_to_csv(excel_path)
	[{'sheet_no': 1, 'sheet_name': 'test', 'csv_content': 'a,b,c\\r\\n1,2,3\\r\\n'}]
	>>> sheets_to_excel(excel_path, [])
	>>> excel_to_csv(excel_path)
	[{'sheet_no': 1, 'sheet_name': 'Sheet', 'csv_content': ''}]
	"""
	workbook = openpyxl.Workbook()
	default_sheet = workbook["Sheet"]
	if sheets:
	workbook.remove(default_sheet)
	for sheet in sheets:
	sheet_name, csv_content = sheet
	worksheet = workbook.create_sheet(title=sheet_name)
	reader = csv.reader(csv_content.split("\n"))
	for row_index, row in enumerate(reader):
	for col_index, cell_value in enumerate(row):
	worksheet.cell(
	row=row_index + 1,
	column=col_index + 1,
	value=cell_value,
	)
	workbook.save(path)


	def merge_same_cell(xlsx_filename):
	"""
	>>> content = 'a,b,c\\na,b,e\\na,d,e'
	>>> excel_path = '/tmp/test.xlsx'
	>>> sheets_to_excel(excel_path, [('test', content)])
	>>> excel_to_csv(excel_path)
	[... 'csv_content': 'a,b,c\\r\\na,b,e\\r\\na,d,e\\r\\n'}]
	>>> merge_same_cell(excel_path)
	>>> excel_to_csv(excel_path)
	[... 'csv_content': 'a,b,c\\r\\n,,e\\r\\n,d,\\r\\n'}]
	"""
	wb = openpyxl.load_workbook(xlsx_filename)
	for sheet in wb:
	merge_list = []
	for col in sheet.columns:
	start_row = 1
	for i in range(1, len(col)):
	if col[i].value != col[i - 1].value:
	if start_row != i:
	cell_range = (
	sheet.cell(row=start_row, column=col[0].column).coordinate
	+ ":"
	+ sheet.cell(row=i, column=col[0].column).coordinate
	)
	merge_list.append(cell_range)
	start_row = i + 1
	if start_row != len(col) + 1:
	cell_range = (
	sheet.cell(row=start_row, column=col[0].column).coordinate
	+ ":"
	+ sheet.cell(row=len(col), column=col[0].column).coordinate
	)
	merge_list.append(cell_range)
	for cell_range in merge_list:
	if cell_range in sheet.merged_cells:
	sheet.unmerge_cells(cell_range)
	sheet.merge_cells(cell_range)
	wb.save(xlsx_filename)


	def adjust_col_width_to_fit_content(xlsx_filename):
	"""
	Adjusts the column width of an Excel file to fit its content.

	Args:
	- xlsx_filename (str): The path to the Excel file.

	Returns:
	- None
	"""
	wb = openpyxl.load_workbook(xlsx_filename)
	for sheet in wb:
	for col in sheet.columns:
	max_length = 0
	column = col[0].column_letter # Get the column name
	for cell in col:
	try: # Necessary to avoid error on empty cells
	if len(str(cell.value)) > max_length:
	max_length = len(cell.value)
	except Exception:
	pass
	adjusted_width = (max_length + 2) * 1.2 # Adjust the width
	sheet.column_dimensions[column].width = adjusted_width
	wb.save(xlsx_filename)


	def auto_remove_unused_cols_and_rows(xlsx_filename):
	wb = openpyxl.load_workbook(xlsx_filename)

	for sheet in wb:
	# Get the range of cells that contain data
	used_range = sheet.calculate_dimension()

	# Get the maximum row and column indices in the used range
	max_row, min_col, max_col, min_row = openpyxl.utils.cell.range_boundaries(
	used_range,
	)

	# Identify the header row
	header_row = None
	for row in range(min_row, max_row + 1):
	if all(cell.value is None for cell in sheet[row]):
	header_row = row
	else:
	break

	# Delete any unused columns
	for col in range(max_col, sheet.max_column + 1):
	if all(
	sheet[row][col].value is None for row in range(min_row, max_row + 1)
	):
	sheet.delete_cols(col)

	# Delete any unused rows
	if header_row is not None:
	for row in range(max_row, header_row - 1, -1):
	if all(
	sheet[row][col].value is None for col in range(min_col, max_col + 1)
	):
	sheet.delete_rows(row)
	else:
	for row in range(max_row, 0, -1):
	if all(
	sheet[row][col].value is None for col in range(min_col, max_col + 1)
	):
	sheet.delete_rows(row)

	# Unmerge any merged cells before deleting them
	for merged_cell_range in sheet.merged_cells.ranges:
	merged_cell_range.unmerge()

	wb.save(xlsx_filename)


	def auto_height_row(xlsx_filename):
	wb = openpyxl.load_workbook(xlsx_filename)

	for sheet in wb:
	for row in sheet.rows:
	max_height = 0
	for cell in row:
	try:
	if cell.value:
	cell_height = len(str(cell.value)) * 1.2
	if cell_height > max_height:
	max_height = cell_height
	except Exception:
	pass
	if max_height > 0:
	sheet.row_dimensions[row[0].row].height = max_height

	wb.save(xlsx_filename)
No results found