$ python3 -m venv scraping
$ . venv/bin/activate
(venv) $ pip install scrapy
(venv) $ scrapy runspider tablespider.py
...
{'会社名': '○○株式会社', '住所': '東京都新宿区'}
...
import scrapy | |
class TableSpider(scrapy.Spider): | |
name = 'table' | |
start_urls = [ | |
'https://gist.github.com/orangain/55f67efccdbb394f714471b08d4c33eb/raw/29eb49720d46e327a9d7e4dd6fac6aee1ae4b3da/test.html', | |
] | |
def parse(self, response): | |
table_contents = {} | |
for tr in response.css('table tr'): | |
key = tr.css('th::text').extract_first().strip() | |
value = tr.css('td::text').extract_first().strip() | |
table_contents[key] = value | |
print(table_contents) |
<table> | |
<tr> | |
<th>会社名</th> | |
<td>○○株式会社</td> | |
</tr> | |
<tr> | |
<th>住所</th> | |
<td>東京都新宿区 <a href="#">Google マップで見る</a> | |
</tr> | |
</table> |