Last active
July 13, 2019 18:46
-
-
Save johanlantz/943a15e9c19d700055dbdb7fa70db060 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SELECT p.RTG_ID, p.RVW_PROD_ID, p.RVW_LANG_ID, p.RVW_DATE_ADD, p.RVW_STATUS, p.RVW_CUST_ID, psr.RTG_NOTE, cust.email, cust.firstname, cust.lastname, p.RVW_DATA FROM ps_gsr_review p LEFT JOIN ps_gsr_rating psr ON p.RTG_ID = psr.RTG_ID LEFT JOIN ps_customer cust ON cust.id_customer = p.RVW_CUST_ID | |
import csv | |
IDX_RAT_ID = 0 | |
IDX_REV_PROD_ID = 1 | |
IDX_REV_LANG_ID = 2 | |
IDX_REV_DATE = 3 | |
IDX_REV_STATUS = 4 | |
IDX_REV_CUST_ID = 5 | |
IDX_RTG_NOTE = 6 | |
IDX_CUST_EMAIL = 7 | |
IDX_CUST_FIRSTNAME = 8 | |
IDX_CUST_LASTNAME = 9 | |
yotpo_csv_headers = "published,review_title,review_content,review_score,date,product_id,display_name,email,comment_content,lang_id,cust_id" | |
def get_next_line_end(data, start): | |
end = data.find("}", start) | |
if end != -1: | |
return end + 4 #need to scan past the terminating }";\n to get to the next line | |
else: | |
return -1 | |
def get_params_without_json_field(line): | |
end = line.find("a:4:{") | |
print ("Params line without json is", line[:end]) | |
return line[:end] | |
def get_json_data_field(line): | |
start = line.find("a:4:{") | |
if start != -1: | |
end = data.find("}", start) +1 | |
return line[start:end] | |
else: | |
return None | |
def get_field_value(field_name, json_data): | |
start = json_data.find(field_name) + len(field_name) + 5 # move to the s: part | |
end = json_data.find(":", start) | |
field_value_start = end +1 | |
field_value_end = json_data.find("\";", start) +1 | |
print ("God field value for " + field_name + " as " + json_data[field_value_start:field_value_end]) | |
return json_data[field_value_start:field_value_end].replace('\n', ' ').replace('\r', ''); | |
def remove_outer_quotes(line): | |
return line[1:len(line)-1] | |
start = 0 | |
csvfile = open('formatted.csv', 'wb') | |
with open('x5.csv', 'rb') as f: | |
data = f.read() | |
outfile = open('formatted.csv', 'wb') | |
outfile.write(yotpo_csv_headers) | |
outfile.write("\n") | |
line_start = 0 | |
# We must skip the first line with the headers | |
line_end = data.find("\n", 0) | |
line_start = line_end | |
line_end = get_next_line_end(data, line_start) | |
while line_end is not -1: | |
line = data[line_start:line_end] | |
print ("got line " + line , "\n") | |
params_list = get_params_without_json_field(line).split(";") | |
print ("got params ", params_list) | |
if params_list[IDX_REV_STATUS] == "0": | |
print ("Skipping review since status=0") | |
continue | |
json_data = get_json_data_field(line) | |
print ("Got json field as " + json_data) | |
outfile.write("true," + #published | |
remove_outer_quotes(get_field_value("sTitle", json_data)) + "," + | |
remove_outer_quotes(get_field_value("sComment", json_data)) + "," + | |
remove_outer_quotes(params_list[IDX_RTG_NOTE]) + "," + | |
remove_outer_quotes(params_list[IDX_REV_DATE]) + "," + | |
remove_outer_quotes(params_list[IDX_REV_PROD_ID]) + "," + | |
remove_outer_quotes(params_list[IDX_CUST_FIRSTNAME]) + " " + remove_outer_quotes(params_list[IDX_CUST_LASTNAME])[0] + "," + | |
remove_outer_quotes(params_list[IDX_CUST_EMAIL]) + "," + | |
"," + #comment content | |
remove_outer_quotes(params_list[IDX_REV_LANG_ID]) + "," + | |
remove_outer_quotes(params_list[IDX_REV_CUST_ID]) + "," + "\n") | |
# get the next line | |
line_start = line_end | |
line_end = get_next_line_end(data, line_start) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment