Created
March 1, 2022 16:26
-
-
Save spikeein/00d996c03f6e972cc3101f2e3d9c1dd1 to your computer and use it in GitHub Desktop.
招行信用卡邮件抓取
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import datetime | |
from imap_tools import MailBox, AND | |
def parse_html(html): | |
list_day = re.findall(re.compile('120%;">2022/(.*?) 您的消费明细如下:', re.S), html) | |
# print(list_day) | |
list_time = re.findall(re.compile("""<font face="Awesome Font" style="font-size:12px;line-height:120%;">(.*?)</font>""", re.S), html) | |
list_time = [i.strip() for i in list_time if ':' in i] | |
# print(list_time) | |
list_amt = re.findall(re.compile('CNY (.*?)</font>', re.S), html) | |
# print(list_amt) | |
list_text = re.findall(re.compile('尾号1167 消费 (.*?)</font>', re.S), html) | |
#print(list_text) | |
list_total = re.findall(re.compile('¥(.*?)</font>', re.S), html) | |
list_total = [i.strip().replace(',', '') for i in list_total if i] | |
#print(list_total) | |
x = zip(list_day*len(list_time), list_time, list_amt, list_text) | |
return [i for i in x] | |
def write_bean(x): | |
if not x: | |
return | |
month = x[0][0].split('/')[0] | |
with open(f'cmb_2022_{month}.bean', 'a') as f: | |
for i in x: | |
print(i) | |
r = f''' | |
2022-{i[0].replace('/', '-')} * "{i[3]}" | |
Liabilities:CreditCard:AMEX -{i[2]} CNY | |
Expenses:Food:Dinner:Supper | |
payTime: "2022-{i[0].replace('/', '-')} {i[1]}" | |
''' | |
print(r) | |
f.write(r) | |
def main(): | |
mailbox = MailBox('imap.gmail.com').login('[email protected]', 'password', initial_folder='bill') | |
for msg in mailbox.fetch(criteria=AND(date=datetime.datetime.now().date(), from_="cmbchina.com"), charset='utf8'): | |
if '每日信用管家' not in msg.subject: | |
continue | |
print("Message id:",msg.uid) | |
print("Message Subject:",msg.subject) | |
print("Message Date:", msg.date) | |
write_bean(parse_html(msg.html)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment