Skip to content

Instantly share code, notes, and snippets.

@kzinmr
Created September 18, 2020 08:18
Show Gist options
  • Save kzinmr/0a4c974a469ad3393c9115494c26c341 to your computer and use it in GitHub Desktop.
Save kzinmr/0a4c974a469ad3393c9115494c26c341 to your computer and use it in GitHub Desktop.
%%time
import xmltodict
import unicodedata
import requests
import datetime
def download_leidata():
dt = datetime.datetime.today().strftime('%Y%m%d')
filestem = f'JPX-LEI-JPXFullFile_v2_{dt}'
filepath = filestem + '.xml'
url = f'https://www.lei.jpx.co.jp/lei/en/full_file.html?FileIDLatest2={filestem}&RRFullHistory=&RRExceptionHistory='
response = requests.get(url)
if response.ok:
with open(filepath, "w") as fp:
fp.write(response.content.decode('utf8'))
return filepath
def extract_lei_companies(filepath):
od = xmltodict.parse(open(filepath).read())
totcount = int(od['lei:LEIData']['lei:LEIHeader']['lei:RecordCount'])
records = od['lei:LEIData']['lei:LEIRecords']['lei:LEIRecord']
suffix = ['株式会社', '合同会社', '有限会社','目的会社','相互会社','投資法人','信用金庫','中央金庫','連合会','ltd','mited','inc',' llc',]
prefix = ['株式会社', '合同会社', '有限会社','医療法人', '一般社団', '一般財団', '公益財団', '独立行政', '学校法人', '国立大学',]
lei_records = []
for r in records:
lei = r['lei:LEI']
text = r['lei:Entity']['lei:LegalName']['#text']
text_stem = text.split('/')[0]
text_stem = unicodedata.normalize('NFKC', text_stem)
if 'ファンド' not in text_stem and 'fund' not in text_stem.lower() and '為替ヘッジ' not in text_stem:
if any(s in text_stem[-5:].lower() for s in suffix) or any(p in text_stem[:5].lower() for p in prefix):
lei_ver = '/'.join(text.split('/')[1:])
lei_records.append({'text': text_stem, 'lei': lei, 'version': lei_ver if lei_ver else None})
df = pd.DataFrame(lei_records)
companies_with_lei = sorted([text for text, df_ in df.set_index('text').groupby(level=0)])
n_companies = len(companies_with_lei)
print('#COMPANIES: {} / {}'.format(n_companies, totcount))
return companies_with_lei
filepath = download_leidata()
if filepath is not None:
companies_with_lei = extract_lei_companies(filepath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment