mylamour · December 31, 2016 13:34
diff --git a/musicbrainzUrlSchemaParseCsvToJSON.py b/musicbrainzUrlSchemaParseCsvToJSON.py
 import csv
 import json
 import os
 from urlparse import urlparse

 csvfile = open('url.csv', 'r')
 jsonfile = open('test.json', 'w')

 fieldnames = ("@id","sourceUrl")
 reader = csv.DictReader( csvfile, fieldnames)
 for row in reader:
   # print 'row.sourceUrl is ' + row['sourceUrl']
    url = urlparse(row['sourceUrl'])
    if len(url.query) != 0:
        row.update({"idAtSource":url.query})
    else:
        # there also antoher problem, if the url contain the '/', the basename will be none , but it's shoudn't be that
        row.update({"idAtSource":os.path.basename(url.path)})

 # URL domain was complex can't be parse, but this way was amusing to resolve it.  
    domain = url.netloc.split('.')
    if len(domain) == 2:
        row.update({'sourceName':domain[0]})
    else:
        row.update({'sourceName':domain[1]})
        
 #     elif len(domain) == 3 :
 #         row.update({'sourceName':domain[1]})
 #     elif len(domain) == 4 :
 #         row.update({'sourceName':domain[1]})
 #     else:
 #         row.update({'sourceName':domain[1]})

    row.update({"sourceDomain":url.netloc})
    row.update({"@type":"ExternalLink"})
    json.dump(row,jsonfile)
    jsonfile.write('\n')
	import csv
	import json
	import os
	from urlparse import urlparse

	csvfile = open('url.csv', 'r')
	jsonfile = open('test.json', 'w')

	fieldnames = ("@id","sourceUrl")
	reader = csv.DictReader( csvfile, fieldnames)
	for row in reader:
	# print 'row.sourceUrl is ' + row['sourceUrl']
	url = urlparse(row['sourceUrl'])
	if len(url.query) != 0:
	row.update({"idAtSource":url.query})
	else:
	# there also antoher problem, if the url contain the '/', the basename will be none , but it's shoudn't be that
	row.update({"idAtSource":os.path.basename(url.path)})

	# URL domain was complex can't be parse, but this way was amusing to resolve it.
	domain = url.netloc.split('.')
	if len(domain) == 2:
	row.update({'sourceName':domain[0]})
	else:
	row.update({'sourceName':domain[1]})

	# elif len(domain) == 3 :
	# row.update({'sourceName':domain[1]})
	# elif len(domain) == 4 :
	# row.update({'sourceName':domain[1]})
	# else:
	# row.update({'sourceName':domain[1]})

	row.update({"sourceDomain":url.netloc})
	row.update({"@type":"ExternalLink"})
	json.dump(row,jsonfile)
	jsonfile.write('\n')