Skip to content

Instantly share code, notes, and snippets.

@rebane2001
Created July 10, 2023 15:05
Show Gist options
  • Save rebane2001/a2fed42c17b22c04c63cc0128440151f to your computer and use it in GitHub Desktop.
Save rebane2001/a2fed42c17b22c04c63cc0128440151f to your computer and use it in GitHub Desktop.
Testing various ways of reading YouTube API json from a jsonl file
import time
import json
import orjson
import msgspec
class Snippet(msgspec.Struct):
title: str
class Video(msgspec.Struct):
id: str
snippet: Snippet
def benchmark():
for method in range(3):
video_titles = {}
print(f"Benchmarking with {['json', 'orjson', 'msgspec'][method]}...")
start = time.time()
with open("2022-07-01.jsonl") as f:
for l in f:
if method < 2:
j = json.loads(l) if method == 0 else orjson.loads(l)
video_titles[j['id']] = j['snippet']['title']
else:
j = msgspec.json.decode(l, type=Video)
video_titles[j.id] = j.snippet.title
end = time.time()
print(f"Took {end - start:.2f} seconds ({len(video_titles)} lines processed)")
for x in range(3):
print(f"Round {x}")
benchmark()
"""
Example jsonl line (test file had 568k lines):
{"kind": "youtube#video", "etag": "PNtiHJh3jkcT176qpKQ0rd_MsDM", "id": "9bZkp7q19f0", "snippet": {"publishedAt": "2012-07-15T07:46:32Z", "channelId": "UCrDkAvwZum-UTjHmzDI2iIw", "title": "PSY - GANGNAM STYLE(\uac15\ub0a8\uc2a4\ud0c0\uc77c) M/V", "description": "PSY - \u2018I LUV IT\u2019 M/V @ https://youtu.be/Xvjnoagk6GU\nPSY - \u2018New Face\u2019 M/V @https://youtu.be/OwJPPaEyqhI\n\nPSY - 8TH ALBUM '4X2=8' on iTunes @\nhttps://smarturl.it/PSY_8thAlbum\n\nPSY - GANGNAM STYLE(\uac15\ub0a8\uc2a4\ud0c0\uc77c) on iTunes @ http://smarturl.it/PsyGangnam\n\n#PSY #\uc2f8\uc774 #GANGNAMSTYLE #\uac15\ub0a8\uc2a4\ud0c0\uc77c\n\nMore about PSY@\nhttp://www.youtube.com/officialpsy\nhttp://www.facebook.com/officialpsy\nhttp://twitter.com/psy_oppa\nhttps://www.instagram.com/42psy42\nhttp://iTunes.com/PSY\nhttp://sptfy.com/PSY\nhttp://weibo.com/psyoppa", "thumbnails": {"default": {"url": "https://i.ytimg.com/vi/9bZkp7q19f0/default.jpg", "width": 120, "height": 90}, "medium": {"url": "https://i.ytimg.com/vi/9bZkp7q19f0/mqdefault.jpg", "width": 320, "height": 180}, "high": {"url": "https://i.ytimg.com/vi/9bZkp7q19f0/hqdefault.jpg", "width": 480, "height": 360}, "standard": {"url": "https://i.ytimg.com/vi/9bZkp7q19f0/sddefault.jpg", "width": 640, "height": 480}, "maxres": {"url": "https://i.ytimg.com/vi/9bZkp7q19f0/maxresdefault.jpg", "width": 1280, "height": 720}}, "channelTitle": "officialpsy", "tags": ["PSY", "\uc2f8\uc774", "\uac15\ub0a8\uc2a4\ud0c0\uc77c", "\ubba4\uc9c1\ube44\ub514\uc624", "Music Video", "Gangnam Style", "KOREAN SINGER", "KPOP", "KOERAN WAVE", "PSY 6\u7532", "6th Studio Album", "\uc2f8\uc7746\uc9d1", "\uc721\uac11"], "categoryId": "10", "liveBroadcastContent": "none", "localized": {"title": "PSY - GANGNAM STYLE(\uac15\ub0a8\uc2a4\ud0c0\uc77c) M/V", "description": "PSY - \u2018I LUV IT\u2019 M/V @ https://youtu.be/Xvjnoagk6GU\nPSY - \u2018New Face\u2019 M/V @https://youtu.be/OwJPPaEyqhI\n\nPSY - 8TH ALBUM '4X2=8' on iTunes @\nhttps://smarturl.it/PSY_8thAlbum\n\nPSY - GANGNAM STYLE(\uac15\ub0a8\uc2a4\ud0c0\uc77c) on iTunes @ http://smarturl.it/PsyGangnam\n\n#PSY #\uc2f8\uc774 #GANGNAMSTYLE #\uac15\ub0a8\uc2a4\ud0c0\uc77c\n\nMore about PSY@\nhttp://www.youtube.com/officialpsy\nhttp://www.facebook.com/officialpsy\nhttp://twitter.com/psy_oppa\nhttps://www.instagram.com/42psy42\nhttp://iTunes.com/PSY\nhttp://sptfy.com/PSY\nhttp://weibo.com/psyoppa"}}, "contentDetails": {"duration": "PT4M13S", "dimension": "2d", "definition": "hd", "caption": "false", "licensedContent": false, "contentRating": {}, "projection": "rectangular"}, "status": {"uploadStatus": "processed", "privacyStatus": "public", "license": "youtube", "embeddable": true, "publicStatsViewable": true, "madeForKids": false}, "statistics": {"viewCount": "4495845443", "likeCount": "25255825", "favoriteCount": "0", "commentCount": "5331801"}, "player": {"embedHtml": "<iframe width=\"480\" height=\"270\" src=\"//www.youtube.com/embed/9bZkp7q19f0\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture\" allowfullscreen></iframe>"}, "topicDetails": {"topicCategories": ["https://en.wikipedia.org/wiki/Electronic_music", "https://en.wikipedia.org/wiki/Music", "https://en.wikipedia.org/wiki/Music_of_Asia", "https://en.wikipedia.org/wiki/Pop_music"]}, "recordingDetails": {}}
Output:
Round 0
Benchmarking with json...
Took 16.67 seconds (568403 lines processed)
Benchmarking with orjson...
Took 14.27 seconds (568403 lines processed)
Benchmarking with msgspec...
Took 5.22 seconds (568403 lines processed)
Round 1
Benchmarking with json...
Took 16.15 seconds (568403 lines processed)
Benchmarking with orjson...
Took 13.65 seconds (568403 lines processed)
Benchmarking with msgspec...
Took 4.72 seconds (568403 lines processed)
Round 2
Benchmarking with json...
Took 14.51 seconds (568403 lines processed)
Benchmarking with orjson...
Took 12.79 seconds (568403 lines processed)
Benchmarking with msgspec...
Took 4.93 seconds (568403 lines processed)
[Finished in 103.5s]
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment