froop · January 20, 2022 21:49
diff --git a/scraping_sample.py b/scraping_sample.py
 import requests
 import os
 from bs4 import BeautifulSoup

 RES_PATH = 'result' # 結果出力先パス

 os.makedirs(RES_PATH, exist_ok=True)

 root_page = requests.get('https://github.com/froop?tab=repositories')
 root_soup = BeautifulSoup(root_page.text, 'html.parser')
 repo_list = root_soup.find(id='user-repositories-list')
 for item in repo_list.find_all('a', attrs={'itemprop': 'name codeRepository'}):
    item_name = item.text.strip()
    print(item_name + ': ' + item.get('href'))
    item_page = requests.get('https://github.com' + item.get('href'))
    item_soup = BeautifulSoup(item_page.text, 'html.parser')
    readme = item_soup.find(id='readme')
    if not hasattr(readme, 'text'):
        continue
    with open(RES_PATH + '/' + item_name + '.txt', mode='w', encoding='utf-8') as f:
        f.write(readme.text)
	import requests
	import os
	from bs4 import BeautifulSoup

	RES_PATH = 'result' # 結果出力先パス

	os.makedirs(RES_PATH, exist_ok=True)

	root_page = requests.get('https://github.com/froop?tab=repositories')
	root_soup = BeautifulSoup(root_page.text, 'html.parser')
	repo_list = root_soup.find(id='user-repositories-list')
	for item in repo_list.find_all('a', attrs={'itemprop': 'name codeRepository'}):
	item_name = item.text.strip()
	print(item_name + ': ' + item.get('href'))
	item_page = requests.get('https://github.com' + item.get('href'))
	item_soup = BeautifulSoup(item_page.text, 'html.parser')
	readme = item_soup.find(id='readme')
	if not hasattr(readme, 'text'):
	continue
	with open(RES_PATH + '/' + item_name + '.txt', mode='w', encoding='utf-8') as f:
	f.write(readme.text)