Skip to content

Instantly share code, notes, and snippets.

@mmuman
Last active November 25, 2024 01:05
Show Gist options
  • Save mmuman/88a8c965f2332352336527cb92ebfb7a to your computer and use it in GitHub Desktop.
Save mmuman/88a8c965f2332352336527cb92ebfb7a to your computer and use it in GitHub Desktop.
Converts the aliexpress order list or order detail DOM HTML to a printable item list with QR codes, and inventory CSV
#!/usr/bin/env python3
import sys
from parsel import Selector
import qrcode
import qrcode.image.svg
from io import BytesIO
from base64 import b64encode
qrheight = "2cm"
csv_sep = ";"
def main_wrapper(argv):
if len(argv) < 3:
print("Converts the aliexpress order list or order detail DOM HTML to a printable item list with QR codes, or a CSV for inventory")
print(f"{argv[0]} input.html output.[csv|html]")
return
with open(argv[1]) as source, open(argv[2], "w") as output:
selector = Selector(text=source.read())
csv = csv_sep.join(['Item','Description','SKU','Quantity','Order','ItemURL','OrderURL']) + '\n'
html = '<html><body>\n';
for o in selector.css('div.order-item, div.order-detail-item-content'):
order_url = o.css('div.order-item-header-right').xpath('.//@href').get() or ""
order = order_url.split('orderId=')
order = "" if len(order) < 2 else order[1].split("&")[0]
item_url = o.css('div.order-item-content-body > a, a.order-detail-item-content-img').xpath('.//@href').get()
if (item_url):
# remove query part
item_url = item_url.split('?')[0]
item = item_url.split('/')[-1].split('.')[0]
desc = o.css('div.order-item-content-info-name span::text, div.item-title a::text').get()
sku = o.css('div.order-item-content-info-sku::text, div.item-sku-attr::text').get() or "?"
qty = o.css('span.order-item-content-info-number-quantity::text, span.item-price-quantity::text').get().replace("x","") or 0
if (item_url[0] == '/'):
item_url = 'https:' + item_url
buffer = BytesIO()
img = qrcode.make(item_url)
img.save(buffer)
img = b64encode(buffer.getvalue()).decode()
img = "data:image/png;base64,{}".format(img)
img = f'<img src="{img}" style="float:right;height:{qrheight}"/>'
html += f'<div style="break-inside: avoid;">{img}\n<b>{desc}</b><br/><span>{sku}</span><span style="float:right">{item}</span></div><br clear="both"/>'
html += '<hr style="break-after: auto;"/>\n'
row = [item,desc,sku,qty,order,item_url,order_url]
row = ["\"%s\"" % str(i).replace('"', "'") for i in row]
#print(row)
csv += csv_sep.join(row) + "\n"
else:
print("Skipping multiple items order:")
print(order_url)
html += '</body></html>';
if (argv[2][-4:] == ".csv"):
output.write(csv)
elif (argv[2][-5:] == ".html"):
output.write(html)
else:
print("Ugh, unknown output format!")
return
if __name__ == '__main__':
main_wrapper(sys.argv)
@mmuman
Copy link
Author

mmuman commented Nov 22, 2024

You need to save the actual DOM, not the page (F12, right click on the tag), Copy -> Copy outer HTML, then save it.

@mmuman
Copy link
Author

mmuman commented Nov 25, 2024

It can now output a CSV instead, for inventory purposes.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment