Last active
February 26, 2020 14:16
-
-
Save mikkame/d229361205bbdf10b95c45758bb387fd to your computer and use it in GitHub Desktop.
amazon.co.jpの注文履歴からカード支払い情報を抜き出す
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(()=> { | |
let result = []; | |
let complete_count =0; | |
let crawlPage = (page_document) => { | |
Array.from( | |
page_document.querySelectorAll('.a-color-secondary.value') | |
).map((el) =>{ | |
// 自分で確認したパターンだと3-7-7のみ、他のパターンあるかも | |
if (el.textContent.match(/\d{3}-\d{7}-\d{7}/)) { | |
let order = {} | |
result.push(order); | |
let orderID = el.textContent; | |
order.orderID = orderID; | |
let request = new XMLHttpRequest(); | |
request.open('GET', 'https://www.amazon.co.jp/gp/css/summary/print.html/ref=oh_aui_ajax_invoice?ie=UTF8&orderID='+orderID, true); | |
request.onload = function () { | |
if (this.status >= 200 && this.status < 400) { | |
let subdoc = document.implementation.createHTMLDocument(); | |
subdoc.body.innerHTML = this.response; | |
Array.from( | |
subdoc.querySelectorAll('td') | |
).map((el) => { | |
if (el.textContent.match(/クレジットカードへの請求/) && el.nextElementSibling) { | |
order.cardInfo = el.nextElementSibling.querySelector('td:first-child').textContent | |
order.payInfo = el.nextElementSibling.querySelector('td:last-child').textContent | |
} | |
}) | |
} else { | |
console.log('ajax error. you can fix code!') | |
} | |
complete_count++; | |
if(complete_count == result.length) { | |
console.log('complete.') | |
console.log(result) | |
} | |
}; | |
request.send(); | |
} | |
}) | |
if (page_document.querySelector('.a-last a')) { | |
let request = new XMLHttpRequest(); | |
request.open('GET', 'https://www.amazon.co.jp/'+page_document.querySelector('.a-last a').getAttribute('href'), true); | |
request.onload = function () { | |
if (this.status >= 200 && this.status < 400) { | |
let next_pagedoc = document.implementation.createHTMLDocument(); | |
next_pagedoc.body.innerHTML = this.response; | |
crawlPage(next_pagedoc) | |
} else { | |
console.log('pagination error') | |
} | |
}; | |
request.send(); | |
} | |
} | |
crawlPage(document) | |
})() |
使い方
amazonの注文履歴ページを開いてデベロッパーコンソールにぶち込む
MITで!
ページネーションつらみ
ページネーション実装
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
TODO ページネーション
TODO CSV化