Skip to content

Instantly share code, notes, and snippets.

@typebrook
Last active June 11, 2022 04:57
Show Gist options
  • Select an option

  • Save typebrook/0c053a243a9f2ea75e8c56eb08f5b629 to your computer and use it in GitHub Desktop.

Select an option

Save typebrook/0c053a243a9f2ea75e8c56eb08f5b629 to your computer and use it in GitHub Desktop.
Add OSM ID into Wikidata #wikidata #osm #script #wip
clean:
rm *.list *.osm *.osc
# P31=屬於 Q7930614=中華民國村里 P5020=中華民國戶政資料代碼
define quest
SELECT DISTINCT ?village ?villageLabel
WHERE {
?village wdt:P31 wd:Q7930614.
MINUS { ?village wdt:402 [] } .
SERVICE wikibase:label { bd:serviceParam wikibase:language "zh" }
}
endef
export quest
# 取得有戶政代碼的Wikidata村里清單
# 第一欄為Q identifier, 第二欄為戶政資料代碼, 以空白分格
wd_villages.list:
curl -G 'https://query.wikidata.org/sparql' \
--header "Accept: text/csv" \
--data-urlencode query="$$quest" | \
sed -Ee '1d; s#.+/##; s/,/ /; s/\r$$//' >$@
OVERPASS_API := https://overpass.nchc.org.tw/api/interpreter
TAIWAN_BBOX := 20.72799,118.1036,26.60305,122.9312
# 使用NCHC OverPass Server
# 取得臺灣內, 有戶政代碼, 但沒有wikidata tag的"村里資料"(OSM格式)
villages.osm:
echo "[out:xml]; relation[admin_level=9][nat_ref][!wikidata]($(TAIWAN_BBOX));out meta;" | \
curl -d @- -X POST $(OVERPASS_API) >$@
# 簡化"村里資料"為"OSM村里清單", 去除外層標籤, 每一筆資料縮為一行
villages_oneline.osm: villages.osm
xq -x --xml-root=relation '.osm.relation[] | {"@id": .["@id"], "@version": .["@version"], tag: .tag, member: .member}' $< | \
tr -d '\n' | \
sed -Ee 's/(<\/relation>)/\1\n/g' >$@
# 依"OSM村里清單",取得"戶政代碼清單"(一行一個)
osm_nat_ref.list: villages.osm
xq -r '.osm.relation[] | .tag[] | select(.["@k"]=="nat_ref") | .["@v"]' $< >$@
# 依"戶政代碼清單", 取得相對應的Q identifier
matched.list: wd_villages.list osm_nat_ref.list
awk 'NR==FNR {a[$$2]=$$1; next} {print a[$$1]}' $^ >$@
# 將對應的Q identifier加入"OSM村里清單"中
# 若無對應的Q identifier, 則刪去該村里relation
# 將結果包裏成osmChange file (.osc file)
.ONESHELL:
final.osc: villages_oneline.osm matched.list
paste $^ | \
sed -Ee '/<\/relation>\t$$/ d; s/(<\/relation>)\t(.+)$$/<tag k="wikidata" v="\2"><\/tag>\1/' | \
sed -e '1 i <osmChange version="0.6" generator="bash script">
1 i <modify>
$$ a </modify>
$$ a </osmChange>' >$@
# 新建Changeset, 上傳osmChange file, 關閉該Changeset
changeset: final.osc
curl -fsS https://raw.githubusercontent.com/typebrook/helper/dev/tools/osm/osm.api.changeset.commit | \
bash /dev/stdin $<
curl 'https://www.wikidata.org/w/api.php'
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0'
-H 'Accept: application/json, text/javascript, */*; q=0.01'
-H 'Accept-Language: zh-TW,en-US;q=0.7,en;q=0.3' --compressed
-H 'Referer: https://www.wikidata.org/wiki/Q17065066'
-H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8'
-H 'X-Requested-With: XMLHttpRequest'
-H 'Origin: https://www.wikidata.org'
-H 'DNT: 1'
-H 'Connection: keep-alive'
-H 'Cookie: WMF-Last-Access=10-Aug-2020; WMF-Last-Access-Global=10-Aug-2020; wikidatawikiss0-UserID=3020042; wikidatawikiUserID=3020042; wikidatawikiss0-UserName=Typebrook; wikidatawikiUserName=Typebrook; centralauth_ss0-User=Typebrook; centralauth_User=Typebrook; centralauth_ss0-Token=c2c03ec1152bac77c2573288aea228c9; centralauth_Token=c2c03ec1152bac77c2573288aea228c9; loginnotify_prevlogins=2020-hdjhjk-bnk368kxuwnwgl50h78xjae91b6ku2w; GeoIP=TW:HSQ:Hsinchu:24.81:120.97:v4; wikidatawikimwuser-sessionId=5156c2a93dbc3c6db408; ss0-wikidatawikiSession=tjojdej2cqivfvb5d0o5t8l7rhpbtuvu; wikidatawikiSession=tjojdej2cqivfvb5d0o5t8l7rhpbtuvu; ss0-centralauth_Session=7d7e595a6941a39862780fbcac8575b1; centralauth_Session=7d7e595a6941a39862780fbcac8575b1'
-H 'Pragma: no-cache'
-H 'Cache-Control: no-cache'
-H 'TE: Trailers'
--data-raw '
action=wbsetclaim&
format=json&
claim=%7B%22type%22%3A%22statement%22%2C%22mainsnak%22%3A%7B%22snaktype%22%3A%22value%22%2C%22property%22%3A%22P402%22%2C%22datavalue%22%3A%7B%22type%22%3A%22string%22%2C%22value%22%3A%229183820%22%7D%7D%2C%22id%22%3A%22Q17065066%245f364135-41d5-630c-c8b9-029d4fc6869d%22%2C%22rank%22%3A%22normal%22%7D&
baserevid=1226479126&
bot=1&
assertuser=Typebrook&
errorformat=plaintext&
uselang=zh-tw&
token=3d8f184c18c0d5460d00700da1b35c295f313253%2B%5C'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment