perXautomatik · October 15, 2020 09:58
diff --git a/jsonstream.py b/jsonstream.py
 """
 JsonStream by Gareth George

 LICENSE: The do whatever the heck you want with it license.
 Do what you want with it, when you want to do it. I take no responsibility for any damages that may occur in the usage of this script. This means no guaranties as to its reliability or stability.

 USAGE:

    file = open_stream('filename.json', 'rb') # opens the file with the given file name to be read as a json stream
    file = open_stream('filename.json.gz', 'rb') # opens the COMPRESSED file with the file name to be read as a json stream
    file = open_stream('filename.json.gz', 'wb') # opens the COMPRESSED file for writing out json data.

 It is worth noteing that these files must be opened in binary mode. This results from the implementation detail that the JSON stream class is a subclass of io.TextIOWrapper which expects the argument to its constructor to implement the binary stream API's.
 Once you have a handle to a file writing to it is as simple as calling
    
    file.write("text") to write some text... why you would do this I do not know.
    file.writeJSON({"hello": "world"}) will write a single json object into the file. This function is unicode safe (hopefully).
    file.readJSON() will read a single JSON object from the file however its usage is discouraged in favor of iterJSON since readJSON is slower. It is still provided however just incase you need it.
    file.iterJSON() returns an iterator over all of the json objects found in the file.

 usage examples for file.iterJSON()

    json_objects = list(file.iterJSON()) # returns a list of every json object in the file. You can do this however it is discouraged because it requires you to load every object in the file into memory which can be impractacle for very large files. Note that even if the file is only a gigabyte compressed it may contain 8 gigabytes of JSON data so even seemingly small files can cause your memory usage to EXPLODE!
    
    for obj in file.iterJSON():
        print(obj) # prints out every object in the JSON file. This is the preferred way of accessing the objects in a file as it allows you to lasily load them, print them, and then allow the garbage collector to clear the object out of memory! Happy times.
        
 Enjoy!
 """

 import os
 import json
 import io
 try:
    import gzip
 except ImportError: pass
 try:
    import lzma
 except ImportError: pass
 try:
    import bz2
 except ImportError: pass

 DEFAULT_BLOCK_SIZE = 1024 * 4

 class JsonStream(io.TextIOWrapper):

    def readJSON(self, blocksize=DEFAULT_BLOCK_SIZE, encoding='utf-8'):
        original_file_position = self.tell()
        decoder = json.JSONDecoder(strict=False)
        buffer = ''

        for block in iter(lambda: self.read(blocksize), ''):
            buffer += block
            try:
                result, index = decoder.raw_decode(buffer)
                self.seek(index - len(buffer), SEEK_CUR)
            except ValueError as e: pass

        self.seek(original_file_position)
        return None

    def iterJSON(self, blocksize=DEFAULT_BLOCK_SIZE, encoding='utf-8'):
        blocksize_orig = blocksize
        decoder = json.JSONDecoder(strict=False)
        buffer = ''

        for block in iter(lambda: self.read(blocksize), ''):
            buffer += block
            try:
                while True:
                    result, index = decoder.raw_decode(buffer)
                    buffer = buffer[index:]
                    yield result
            except ValueError as e: pass

    def writeJSON(self, object):
        json.dump(object, self, ensure_ascii=False)

 def open_stream(filename, mode, *args, **argz):
    ext = os.path.splitext(filename)[-1].lower()
    if ext == '.gz':
        return JsonStream(gzip.GzipFile(filename, mode, *args, **argz), encoding='utf-8')
    elif ext == '.lzma' or ext == '.lz':
        return JsonStream(lzma.LZMAFile(filename, mode, *args, **argz), encoding='utf-8')
    elif ext == '.bz2':
        return JsonStream(bz2.BZ2File(filename, mode, *args, **argz), encoding='utf-8')
    else:
        return JsonStream(open(filename, mode, *args, **argz), encoding='utf-8')
	"""
	JsonStream by Gareth George

	LICENSE: The do whatever the heck you want with it license.
	Do what you want with it, when you want to do it. I take no responsibility for any damages that may occur in the usage of this script. This means no guaranties as to its reliability or stability.

	USAGE:

	file = open_stream('filename.json', 'rb') # opens the file with the given file name to be read as a json stream
	file = open_stream('filename.json.gz', 'rb') # opens the COMPRESSED file with the file name to be read as a json stream
	file = open_stream('filename.json.gz', 'wb') # opens the COMPRESSED file for writing out json data.

	It is worth noteing that these files must be opened in binary mode. This results from the implementation detail that the JSON stream class is a subclass of io.TextIOWrapper which expects the argument to its constructor to implement the binary stream API's.
	Once you have a handle to a file writing to it is as simple as calling

	file.write("text") to write some text... why you would do this I do not know.
	file.writeJSON({"hello": "world"}) will write a single json object into the file. This function is unicode safe (hopefully).
	file.readJSON() will read a single JSON object from the file however its usage is discouraged in favor of iterJSON since readJSON is slower. It is still provided however just incase you need it.
	file.iterJSON() returns an iterator over all of the json objects found in the file.

	usage examples for file.iterJSON()

	json_objects = list(file.iterJSON()) # returns a list of every json object in the file. You can do this however it is discouraged because it requires you to load every object in the file into memory which can be impractacle for very large files. Note that even if the file is only a gigabyte compressed it may contain 8 gigabytes of JSON data so even seemingly small files can cause your memory usage to EXPLODE!

	for obj in file.iterJSON():
	print(obj) # prints out every object in the JSON file. This is the preferred way of accessing the objects in a file as it allows you to lasily load them, print them, and then allow the garbage collector to clear the object out of memory! Happy times.

	Enjoy!
	"""

	import os
	import json
	import io
	try:
	import gzip
	except ImportError: pass
	try:
	import lzma
	except ImportError: pass
	try:
	import bz2
	except ImportError: pass

	DEFAULT_BLOCK_SIZE = 1024 * 4

	class JsonStream(io.TextIOWrapper):

	def readJSON(self, blocksize=DEFAULT_BLOCK_SIZE, encoding='utf-8'):
	original_file_position = self.tell()
	decoder = json.JSONDecoder(strict=False)
	buffer = ''

	for block in iter(lambda: self.read(blocksize), ''):
	buffer += block
	try:
	result, index = decoder.raw_decode(buffer)
	self.seek(index - len(buffer), SEEK_CUR)
	except ValueError as e: pass

	self.seek(original_file_position)
	return None

	def iterJSON(self, blocksize=DEFAULT_BLOCK_SIZE, encoding='utf-8'):
	blocksize_orig = blocksize
	decoder = json.JSONDecoder(strict=False)
	buffer = ''

	for block in iter(lambda: self.read(blocksize), ''):
	buffer += block
	try:
	while True:
	result, index = decoder.raw_decode(buffer)
	buffer = buffer[index:]
	yield result
	except ValueError as e: pass

	def writeJSON(self, object):
	json.dump(object, self, ensure_ascii=False)

	def open_stream(filename, mode, args, *argz):
	ext = os.path.splitext(filename)[-1].lower()
	if ext == '.gz':
	return JsonStream(gzip.GzipFile(filename, mode, args, *argz), encoding='utf-8')
	elif ext == '.lzma' or ext == '.lz':
	return JsonStream(lzma.LZMAFile(filename, mode, args, *argz), encoding='utf-8')
	elif ext == '.bz2':
	return JsonStream(bz2.BZ2File(filename, mode, args, *argz), encoding='utf-8')
	else:
	return JsonStream(open(filename, mode, args, *argz), encoding='utf-8')