Skip to content

Instantly share code, notes, and snippets.

@spikeein
Created March 1, 2022 16:26
Show Gist options
  • Save spikeein/00d996c03f6e972cc3101f2e3d9c1dd1 to your computer and use it in GitHub Desktop.
Save spikeein/00d996c03f6e972cc3101f2e3d9c1dd1 to your computer and use it in GitHub Desktop.
招行信用卡邮件抓取
import re
import datetime
from imap_tools import MailBox, AND
def parse_html(html):
list_day = re.findall(re.compile('120%;">2022/(.*?) 您的消费明细如下:', re.S), html)
# print(list_day)
list_time = re.findall(re.compile("""<font face="Awesome Font" style="font-size:12px;line-height:120%;">(.*?)</font>""", re.S), html)
list_time = [i.strip() for i in list_time if ':' in i]
# print(list_time)
list_amt = re.findall(re.compile('CNY&nbsp;(.*?)</font>', re.S), html)
# print(list_amt)
list_text = re.findall(re.compile('尾号1167&nbsp;消费&nbsp;(.*?)</font>', re.S), html)
#print(list_text)
list_total = re.findall(re.compile('¥(.*?)</font>', re.S), html)
list_total = [i.strip().replace(',', '') for i in list_total if i]
#print(list_total)
x = zip(list_day*len(list_time), list_time, list_amt, list_text)
return [i for i in x]
def write_bean(x):
if not x:
return
month = x[0][0].split('/')[0]
with open(f'cmb_2022_{month}.bean', 'a') as f:
for i in x:
print(i)
r = f'''
2022-{i[0].replace('/', '-')} * "{i[3]}"
Liabilities:CreditCard:AMEX -{i[2]} CNY
Expenses:Food:Dinner:Supper
payTime: "2022-{i[0].replace('/', '-')} {i[1]}"
'''
print(r)
f.write(r)
def main():
mailbox = MailBox('imap.gmail.com').login('[email protected]', 'password', initial_folder='bill')
for msg in mailbox.fetch(criteria=AND(date=datetime.datetime.now().date(), from_="cmbchina.com"), charset='utf8'):
if '每日信用管家' not in msg.subject:
continue
print("Message id:",msg.uid)
print("Message Subject:",msg.subject)
print("Message Date:", msg.date)
write_bean(parse_html(msg.html))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment