Skip to content

Instantly share code, notes, and snippets.

@shiracamus
Created June 14, 2020 03:54
Show Gist options
  • Save shiracamus/1e26be1c4642d1ca9289b36d9e9a3a10 to your computer and use it in GitHub Desktop.
Save shiracamus/1e26be1c4642d1ca9289b36d9e9a3a10 to your computer and use it in GitHub Desktop.
import sys
import csv
from time import sleep
import requests
from bs4 import BeautifulSoup
def main():
genre = 'tonkatsu' if len(sys.argv) < 2 else sys.argv[1]
save_csv(f'{genre}.csv', shops(genre),
header=["shopname", "address", "point", "regular holiday", "url"])
def shops(genre, interval=5, verbose=True):
for url in shop_urls(genre):
info = shop_info(url)
verbose and print(*map(repr, info))
yield info
sleep(interval)
def shop_urls(genre):
url = 'https://award.tabelog.com/hyakumeiten/' + genre
html = requests.get(url).text
soup = BeautifulSoup(html, 'html.parser')
links = soup.findAll('a', class_='list-shop__link-page')
return [link.get('href') for link in links]
def shop_info(url):
def info(tag):
return tag and tag.text.strip().replace('\u3000', ' ') or ''
html = requests.get(url).text
find = BeautifulSoup(html, 'html.parser').find
name = info(find('div', class_='rdheader-rstname').h2)
address = info(find('p', class_='rstinfo-table__address'))
point = info(find('span', class_='rdheader-rating__score-val-dtl'))
holiday = info(find('dd', class_='rdheader-subinfo__closed-text'))[:10]
return name, address, point, holiday, url
def save_csv(filename, rows, header=None):
with open(filename, 'w', encoding='utf8') as f:
writer = csv.writer(f)
header and writer.writerow(header)
writer.writerows(rows)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment