Last active
September 14, 2020 00:44
-
-
Save dbeley/9363640dbe7cf410c995588585d33038 to your computer and use it in GitHub Desktop.
Script to extract youtube urls from a json file returned by the google takeout export.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Extract youtube urls from a json file returned by the google takeout export. | |
Usage : python extract_urls.py <name of json file> | |
""" | |
import logging | |
import argparse | |
import json | |
from pathlib import Path | |
logger = logging.getLogger() | |
def main(): | |
args = parse_args() | |
with open(args.file, "r", encoding="utf-8") as f: | |
content = json.loads(f.read()) | |
ids = [ | |
"https://youtube.com/watch?v=" + x["contentDetails"]["videoId"] | |
for x in content | |
] | |
with open(f"Export_{Path(args.file).stem}.txt", "w") as f: | |
for i in ids: | |
f.write(i + "\n") | |
def parse_args(): | |
format = "%(levelname)s :: %(message)s" | |
parser = argparse.ArgumentParser( | |
description="Extract youtube urls from a json file returned by the google takekout export." | |
) | |
parser.add_argument( | |
"--debug", | |
help="Display debugging information.", | |
action="store_const", | |
dest="loglevel", | |
const=logging.DEBUG, | |
default=logging.INFO, | |
) | |
parser.add_argument( | |
"file", | |
help="Youtube playlist JSON file from a google takeout export.", | |
type=str, | |
) | |
parser.set_defaults(boolean_flag=False) | |
args = parser.parse_args() | |
logging.basicConfig(level=args.loglevel, format=format) | |
return args | |
if __name__ == "__main__": | |
main() |
It worked, thank you very much!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I've never had this kind of issue, but it seems to be an encoding error.
You can try forcing utf-8 decoding by changing the line 16 of the script with this one:
with open(args.file, "r", encoding="utf-8") as f:
(I also updated the gist reflecting the change)