skagedal · December 30, 2015 08:38
diff --git a/swepubjsonparser.py b/swepubjsonparser.py
 import json
 import re
 from os import listdir

 def fix_escapes(string):
    # Libris over-escapes some backslashes. 
    string = string.replace("\\\\\"","\\\"")
    # Libris fails to properly escape backslashes in strings, which occurs for example with inline
    # LaTeX codes like "$\geq" which should be escaped as "$\\geq".  They do seem to properly
    # escape quote chars, however.  Now, we can't easily know whethera string liike "\n" should be
    # parsed as a newline (which it should in proper JSON) or as a backslash and an n.  So while this 
    # might break some LaTeX codes, at least the JSON parsing should work. 
    return re.sub(r'(?<!\\)\\(?!["\\/])', r"\\\\", string)
    
 for filename in listdir("GU20151228json/"): #alla filer i en katalog
    print("opening " + filename)       
    with open("GU20151228json/" + filename, "rb") as currentFile:
        bytes = currentFile.read()
        string = fix_escapes(bytes.decode('utf-8'))

        jsondata = json.loads(string)
        print("parsed correctly")
        for record in jsondata["xsearch"]["list"]:
            print(record["title"])
	import json
	import re
	from os import listdir

	def fix_escapes(string):
	# Libris over-escapes some backslashes.
	string = string.replace("\\\\\"","\\\"")
	# Libris fails to properly escape backslashes in strings, which occurs for example with inline
	# LaTeX codes like "$\geq" which should be escaped as "$\\geq". They do seem to properly
	# escape quote chars, however. Now, we can't easily know whethera string liike "\n" should be
	# parsed as a newline (which it should in proper JSON) or as a backslash and an n. So while this
	# might break some LaTeX codes, at least the JSON parsing should work.
	return re.sub(r'(?<!\\)\\(?!["\\/])', r"\\\\", string)

	for filename in listdir("GU20151228json/"): #alla filer i en katalog
	print("opening " + filename)
	with open("GU20151228json/" + filename, "rb") as currentFile:
	bytes = currentFile.read()
	string = fix_escapes(bytes.decode('utf-8'))

	jsondata = json.loads(string)
	print("parsed correctly")
	for record in jsondata["xsearch"]["list"]:
	print(record["title"])
No results found