Last active
June 11, 2022 04:57
-
-
Save typebrook/0c053a243a9f2ea75e8c56eb08f5b629 to your computer and use it in GitHub Desktop.
Add OSM ID into Wikidata #wikidata #osm #script #wip
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| clean: | |
| rm *.list *.osm *.osc | |
| # P31=屬於 Q7930614=中華民國村里 P5020=中華民國戶政資料代碼 | |
| define quest | |
| SELECT DISTINCT ?village ?villageLabel | |
| WHERE { | |
| ?village wdt:P31 wd:Q7930614. | |
| MINUS { ?village wdt:402 [] } . | |
| SERVICE wikibase:label { bd:serviceParam wikibase:language "zh" } | |
| } | |
| endef | |
| export quest | |
| # 取得有戶政代碼的Wikidata村里清單 | |
| # 第一欄為Q identifier, 第二欄為戶政資料代碼, 以空白分格 | |
| wd_villages.list: | |
| curl -G 'https://query.wikidata.org/sparql' \ | |
| --header "Accept: text/csv" \ | |
| --data-urlencode query="$$quest" | \ | |
| sed -Ee '1d; s#.+/##; s/,/ /; s/\r$$//' >$@ | |
| OVERPASS_API := https://overpass.nchc.org.tw/api/interpreter | |
| TAIWAN_BBOX := 20.72799,118.1036,26.60305,122.9312 | |
| # 使用NCHC OverPass Server | |
| # 取得臺灣內, 有戶政代碼, 但沒有wikidata tag的"村里資料"(OSM格式) | |
| villages.osm: | |
| echo "[out:xml]; relation[admin_level=9][nat_ref][!wikidata]($(TAIWAN_BBOX));out meta;" | \ | |
| curl -d @- -X POST $(OVERPASS_API) >$@ | |
| # 簡化"村里資料"為"OSM村里清單", 去除外層標籤, 每一筆資料縮為一行 | |
| villages_oneline.osm: villages.osm | |
| xq -x --xml-root=relation '.osm.relation[] | {"@id": .["@id"], "@version": .["@version"], tag: .tag, member: .member}' $< | \ | |
| tr -d '\n' | \ | |
| sed -Ee 's/(<\/relation>)/\1\n/g' >$@ | |
| # 依"OSM村里清單",取得"戶政代碼清單"(一行一個) | |
| osm_nat_ref.list: villages.osm | |
| xq -r '.osm.relation[] | .tag[] | select(.["@k"]=="nat_ref") | .["@v"]' $< >$@ | |
| # 依"戶政代碼清單", 取得相對應的Q identifier | |
| matched.list: wd_villages.list osm_nat_ref.list | |
| awk 'NR==FNR {a[$$2]=$$1; next} {print a[$$1]}' $^ >$@ | |
| # 將對應的Q identifier加入"OSM村里清單"中 | |
| # 若無對應的Q identifier, 則刪去該村里relation | |
| # 將結果包裏成osmChange file (.osc file) | |
| .ONESHELL: | |
| final.osc: villages_oneline.osm matched.list | |
| paste $^ | \ | |
| sed -Ee '/<\/relation>\t$$/ d; s/(<\/relation>)\t(.+)$$/<tag k="wikidata" v="\2"><\/tag>\1/' | \ | |
| sed -e '1 i <osmChange version="0.6" generator="bash script"> | |
| 1 i <modify> | |
| $$ a </modify> | |
| $$ a </osmChange>' >$@ | |
| # 新建Changeset, 上傳osmChange file, 關閉該Changeset | |
| changeset: final.osc | |
| curl -fsS https://raw.githubusercontent.com/typebrook/helper/dev/tools/osm/osm.api.changeset.commit | \ | |
| bash /dev/stdin $< |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| curl 'https://www.wikidata.org/w/api.php' | |
| -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0' | |
| -H 'Accept: application/json, text/javascript, */*; q=0.01' | |
| -H 'Accept-Language: zh-TW,en-US;q=0.7,en;q=0.3' --compressed | |
| -H 'Referer: https://www.wikidata.org/wiki/Q17065066' | |
| -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' | |
| -H 'X-Requested-With: XMLHttpRequest' | |
| -H 'Origin: https://www.wikidata.org' | |
| -H 'DNT: 1' | |
| -H 'Connection: keep-alive' | |
| -H 'Cookie: WMF-Last-Access=10-Aug-2020; WMF-Last-Access-Global=10-Aug-2020; wikidatawikiss0-UserID=3020042; wikidatawikiUserID=3020042; wikidatawikiss0-UserName=Typebrook; wikidatawikiUserName=Typebrook; centralauth_ss0-User=Typebrook; centralauth_User=Typebrook; centralauth_ss0-Token=c2c03ec1152bac77c2573288aea228c9; centralauth_Token=c2c03ec1152bac77c2573288aea228c9; loginnotify_prevlogins=2020-hdjhjk-bnk368kxuwnwgl50h78xjae91b6ku2w; GeoIP=TW:HSQ:Hsinchu:24.81:120.97:v4; wikidatawikimwuser-sessionId=5156c2a93dbc3c6db408; ss0-wikidatawikiSession=tjojdej2cqivfvb5d0o5t8l7rhpbtuvu; wikidatawikiSession=tjojdej2cqivfvb5d0o5t8l7rhpbtuvu; ss0-centralauth_Session=7d7e595a6941a39862780fbcac8575b1; centralauth_Session=7d7e595a6941a39862780fbcac8575b1' | |
| -H 'Pragma: no-cache' | |
| -H 'Cache-Control: no-cache' | |
| -H 'TE: Trailers' | |
| --data-raw ' | |
| action=wbsetclaim& | |
| format=json& | |
| claim=%7B%22type%22%3A%22statement%22%2C%22mainsnak%22%3A%7B%22snaktype%22%3A%22value%22%2C%22property%22%3A%22P402%22%2C%22datavalue%22%3A%7B%22type%22%3A%22string%22%2C%22value%22%3A%229183820%22%7D%7D%2C%22id%22%3A%22Q17065066%245f364135-41d5-630c-c8b9-029d4fc6869d%22%2C%22rank%22%3A%22normal%22%7D& | |
| baserevid=1226479126& | |
| bot=1& | |
| assertuser=Typebrook& | |
| errorformat=plaintext& | |
| uselang=zh-tw& | |
| token=3d8f184c18c0d5460d00700da1b35c295f313253%2B%5C' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment