Last active
March 16, 2021 19:58
-
-
Save WinstonN/bc3c1fa4e32c266669b9eb5c91d981c7 to your computer and use it in GitHub Desktop.
Flatten additional_attributes from Magento 2 export csv
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This script formats a magento product export file, and sets each | |
additional_attribute as a new column in a target file | |
""" | |
import csv | |
# real files | |
source_file = './catalog_product_20210316_044037.csv' | |
target_file = './catalog_product_20210316_044037_formatted.csv' | |
def get_modified_headers(): | |
reader = csv.reader(open(source_file, 'r')) | |
headers = next(reader) | |
# get all headers for additional_attributes | |
additional_attributes_headers = [] | |
i = 0 | |
for row in reader: | |
target_index = headers.index("additional_attributes") | |
attributes = row[target_index].split(",") | |
for attribute in attributes: | |
data = attribute.split("=") | |
if len(data) != 1: | |
if data[0] != 'publish_date': | |
header = data[0] | |
# add attribute as header | |
if header not in additional_attributes_headers: | |
additional_attributes_headers.append(header) | |
# iterator | |
i += 1 | |
headers_modified = headers + additional_attributes_headers | |
return [headers_modified, additional_attributes_headers] | |
def run_with_writer(): | |
""" Main run function """ | |
reader = csv.reader(open(source_file, 'r')) | |
writer = csv.writer(open(target_file, 'w')) | |
# get original headers | |
headers = next(reader) | |
# add additional attributes to headers | |
modified_headers = get_modified_headers()[0] | |
additional_attributes_headers = get_modified_headers()[1] | |
# write headers to file | |
writer.writerow(modified_headers) | |
# extract data and populate rows | |
i = 0 | |
for row in reader: | |
print(f'processing row: {i}') | |
result = [None] * len(additional_attributes_headers) | |
# pre-populate result (to avoid shifting items as we add insert them at the index) | |
target_index = headers.index("additional_attributes") | |
attributes = row[target_index].split(",") | |
for attribute in attributes: | |
data = attribute.split("=") | |
if len(data) != 1: | |
if data[0] != 'publish_date': | |
header = data[0] | |
value = data[1] | |
target_index = additional_attributes_headers.index(header) | |
result[target_index] = value | |
modified_row = row + result | |
# write modified row | |
writer.writerow(modified_row) | |
i += 1 | |
# Main entry point | |
if __name__ == '__main__': | |
run_with_writer() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment