Skip to content

Instantly share code, notes, and snippets.

@ijharulislam
Created September 20, 2017 07:03
Show Gist options
  • Save ijharulislam/66d7922d42c23fce9b3bac3b518fbf13 to your computer and use it in GitHub Desktop.
Save ijharulislam/66d7922d42c23fce9b3bac3b518fbf13 to your computer and use it in GitHub Desktop.
input_data = {
'format':
[
{
'item_name': 'name',
'xpath': '//*[@id="topicPath"]/li[2]/text()',
'regex_match': 'Null',
'regex_item': 'Null',
'remove_tag': ''},
{
'item_name': 'zip',
'xpath': '//*[@id="header"]/p',
'regex_match': '〒([\d\-]+)\s*',
'regex_item': '$1 ',
'remove_tag': ''
},
{
'item_name': 'address',
'xpath': '//*[@id="header"]/p',
'regex_match': '〒[\d\-]+\s*(.+)\s*TEL',
'regex_item': '$1 ',
'remove_tag': ''},
{
'item_name': 'tel',
'xpath': '//*[@id="header"]/p',
'regex_match': '〒[\d\-]+\s*.+\s*TEL\s*\:*\s*([\d\-\(\)\s]+)',
'regex_item': '$1',
'remove_tag': ''}
],
'target': [
{
'id': '1',
'url': 'https://www.takashimaya.co.jp/rakusai/index.html'
},
{
'id': '2',
'url': 'https://www.takashimaya.co.jp/okayama/index.html'
},
{
'id': '3',
'url': 'https://www.takashimaya.co.jp/tachikawa/index.html'},
{
'id': '4',
'url': 'https://www.takashimaya.co.jp/yokohama/index.html'}
],
'coordination':[
{
"item_name": "coordination",
"mapurl_xpath": '//*[@id="storeInfo"]/div/p/a',
"xpath": '//*[@id="mapDiv"]/div/div/div[10]/div/div/div/div[7]/div/a',
"regex_match": "https:\/\/maps\.google\.com\/maps\?.*ll=([\d\.]+),([\d\.]+)",
"lat": "$1",
"lon": "$2",
"mapurl_regex": "\/[^\/]+\/access\/index.html"
}
]
}
input_data = {
'format': [
{
'item_name': 'name',
'xpath': '//*[@id="storeMap"]/header[3]/div/h2/text()',
'regex_match': 'Null',
'regex_item': 'Null',
'remove_tag': ''},
{
'item_name': 'zip',
'xpath': '//*[@id="storeMap"]/section/div/div[1]/dl/dd[1]',
'regex_match': '〒([\\d\\-]+)',
'regex_item': '$1 ',
'remove_tag': ''},
{
'item_name': 'address',
'xpath': '//*[@id="storeMap"]/section/div/div[1]/dl/dd[1]',
'regex_match': '〒[\\d\\-]+<br>([^<]+)',
'regex_item': '$1 ',
'remove_tag': ''},
{
'item_name': 'tel',
'xpath': '//*[@id="storeMap"]/section/div/div[1]/dl/dd[2]/text()',
'regex_match': 'Null',
'regex_item': 'Null',
'remove_tag': ''},
{
'item_name': 'hour',
'xpath': '//*[@id="storeMap"]/section/div/div[1]/dl/dd[3]',
'regex_match': 'Null',
'regex_item': 'Null',
'remove_tag': '1'},
{
'item_name': 'name',
'xpath': '//*[@id="storeMap"]/header[3]/div/h2/text()',
'regex_match': 'Null',
'regex_item': 'Null',
'remove_tag': ''}
],
'target': [
{
'id': '1',
'url': 'https://www.burgerkingjapan.co.jp/stores/detail.html?sn=89'},
{
'id': '2',
'url': 'https://www.burgerkingjapan.co.jp/stores/detail.html?sn=124'},
{
'id': '3',
'url': 'https://www.burgerkingjapan.co.jp/stores/detail.html?sn=38'},
{
'id': '4',
'url': 'https://www.burgerkingjapan.co.jp/stores/detail.html?sn=12'
},
{
'id': '5',
'url': 'https://www.burgerkingjapan.co.jp/stores/detail.html?sn=91'}
],
'coordination': [
{
"item_name":"coordinates",
"xpath": '//*[@id="gmap"]/div/div/div[2]/a',
"regex_match": "https:\/\/maps\.google\.com\/maps\?.*ll=([\d¥.]+),([\d\.]+)",
"lat": "$1",
"lon": "$2",
"mapurl_regex": ""
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment