Last active
February 14, 2022 07:19
-
-
Save bobthemighty/9f4fd8fbb2435b8f6b8cf191dabdf37a to your computer and use it in GitHub Desktop.
Streaming spikes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sip(stream): | |
""" | |
NAYA will handle our json just fine so long as it starts with an | |
open bracket, so it thinks it has a list. | |
""" | |
def f(): | |
# Yield a dummy `[` | |
yield TOKEN_TYPE.OPERATOR, "[" | |
# Followed by the rest of the tokens in the stream | |
for t in tokenize(stream): | |
yield t | |
try: | |
for o in stream_array(f()): | |
yield o | |
except RuntimeError as e: | |
pass | |
import gzip | |
import boto3 | |
s3 = boto3.client("s3") | |
def parse_file(bucket: str, key: str): | |
data = s3.get_object(Key=key, Bucket=bucket) | |
body = gzip.open(data["Body"], encoding="UTF-8", mode="rt") | |
for obj in sip(body): | |
print(obj) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ContentHandler(YajlContentHandler): | |
def __init__(self): | |
self._keys = [] | |
self._objects = [] | |
self.results = [] | |
def set(self, value): | |
self._objects[-1][self._keys.pop()] = value | |
def yajl_null(self, ctx): | |
self.set(None) | |
def yajl_boolean(self, ctx, boolVal): | |
self.set(boolVal) | |
def yajl_integer(self, ctx, integerVal): | |
self.set(integerVal) | |
def yajl_double(self, ctx, doubleVal): | |
self.set(doubleVal) | |
def yajl_number(self, ctx, stringNum): | |
"""Since this is defined both integer and double callbacks are useless""" | |
num = float(stringNum) if b"." in stringNum else int(stringNum) | |
self.set(num) | |
def yajl_string(self, ctx, stringVal): | |
self.set(stringVal.decode()) | |
def yajl_start_map(self, ctx): | |
self._objects.append(dict()) | |
def yajl_map_key(self, ctx, stringVal): | |
self._keys.append(stringVal.decode()) | |
def yajl_end_map(self, ctx): | |
if len(self._objects) == 1: | |
self.results.append(self._objects.pop()) | |
else: | |
self.set(self._objects.pop()) | |
def yajl_start_array(self, ctx): | |
self._lists.append([]) | |
self._in_list = True | |
def yajl_end_array(self, ctx): | |
self.set(self._lists.pop()) | |
self._in_list = False | |
# Create the parser | |
handler = ContentHandler() | |
parser = YajlParser(handler) | |
parser.allow_trailing_garbage = True | |
parser.allow_multiple_values = True | |
import gzip | |
import boto3 | |
s3 = boto3.client("s3") | |
def parse_file(bucket: str, key: str): | |
data = s3.get_object(Key=key, Bucket=bucket) | |
body = gzip.open(data["Body"]) | |
parser.parse(body) | |
print(handler.results) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment