ettorerizza · September 9, 2017 15:56
diff --git a/parseNeckarJson.py b/parseNeckarJson.py
 import pandas as pd
 import simplejson as json
 import gzip

 def getTargetIds(jsonData):
    data = json.loads(jsonData)
    return (str(data.get('id', 'null')),
            str(data.get('norm_name', 'null')),
            str(data.get('description', 'null')),
            str(data.get('date_birth', 'null')),
            str(data.get('date_death', 'null')),
            str(data.get('gender', 'null')),
            str(data.get('en_sitelink', 'null')),
            str("||".join(data.get('alias', 'null'))))

 with gzip.open('WikidataNE_20170320_Persons_NECKAR_1_0.json_.gz',"r") as infile, open(r'result.txt', "w", encoding="utf8") as outfile:
    for row in infile:
        outfile.write("::".join(getTargetIds(row))+"\n")

 df = pd.read_csv(r'result.txt', sep="::", index_col=False, header= None, encoding="utf8")
 df.columns = ['id', 'nom', 'description', 'birth', 'death', 'gender', 'en_wiki', 'alias']
 df.to_csv(r'wikidata_per_neckar.csv', encoding="utf8")
	import pandas as pd
	import simplejson as json
	import gzip

	def getTargetIds(jsonData):
	data = json.loads(jsonData)
	return (str(data.get('id', 'null')),
	str(data.get('norm_name', 'null')),
	str(data.get('description', 'null')),
	str(data.get('date_birth', 'null')),
	str(data.get('date_death', 'null')),
	str(data.get('gender', 'null')),
	str(data.get('en_sitelink', 'null')),
	str("\|\|".join(data.get('alias', 'null'))))

	with gzip.open('WikidataNE_20170320_Persons_NECKAR_1_0.json_.gz',"r") as infile, open(r'result.txt', "w", encoding="utf8") as outfile:
	for row in infile:
	outfile.write("::".join(getTargetIds(row))+"\n")

	df = pd.read_csv(r'result.txt', sep="::", index_col=False, header= None, encoding="utf8")
	df.columns = ['id', 'nom', 'description', 'birth', 'death', 'gender', 'en_wiki', 'alias']
	df.to_csv(r'wikidata_per_neckar.csv', encoding="utf8")