Created
November 2, 2024 12:31
-
-
Save superbonaci/baf1ed9f880eaaa4332001fc224b2e50 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/README.md b/README.md | |
index ed2f779..5598e7c 100644 | |
--- a/README.md | |
+++ b/README.md | |
@@ -12,7 +12,7 @@ pip install youtube-comment-downloader | |
Or directly from the GitHub repository: | |
``` | |
-pip install https://github.com/egbertbouman/youtube-comment-downloader/archive/master.zip | |
+pip install https://github.com/biggestsonicfan/youtube-comment-downloader/archive/normalize-id-output.zip | |
``` | |
### Usage as command-line interface | |
@@ -45,6 +45,17 @@ youtube-comment-downloader --youtubeid ScMzIvxBSi4 --output ScMzIvxBSi4.json | |
For Youtube IDs starting with - (dash) you will need to run the script with: | |
`-y=idwithdash` or `--youtubeid=idwithdash` | |
+### Additionally | |
+You can pass just a valid Youtube url and the comment data will be saved as the Youtube video's ID | |
+``` | |
+youtube-comment-downloader https://www.youtube.com/watch?v=lalOy8Mbfdc | |
+``` | |
+``` | |
+youtube-comment-downloader http://www.youtube.com/watch?v=ishbTyLs6ps&list=PLGup6kBfcU7Le5laEaCLgTKtlDcxMqGxZ&index=106&shuffle=2655 | |
+``` | |
+``` | |
+youtube-comment-downloader http://youtu.be/dQw4w9WgXcQ?feature=youtube_gdata_player | |
+``` | |
### Usage as library | |
You can also use this script as a library. For instance, if you want to print out the 10 most popular comments for a particular Youtube video you can do the following: | |
diff --git a/youtube_comment_downloader/__init__.py b/youtube_comment_downloader/__init__.py | |
index fba3e58..19b5757 100644 | |
--- a/youtube_comment_downloader/__init__.py | |
+++ b/youtube_comment_downloader/__init__.py | |
@@ -4,6 +4,8 @@ import json | |
import os | |
import sys | |
import time | |
+import re | |
+from urllib.parse import urljoin, urlparse, parse_qs | |
from .downloader import YoutubeCommentDownloader, SORT_BY_POPULAR, SORT_BY_RECENT | |
@@ -31,6 +33,24 @@ def main(argv = None): | |
help='Whether to download popular (0) or recent comments (1). Defaults to 1') | |
try: | |
+ # Check if only a Youtube url has been passed | |
+ if len(sys.argv) == 2: | |
+ youtube_url = urlparse(sys.argv.pop(1)) | |
+ if "youtube.com" in youtube_url.netloc or "youtu.be" in youtube_url.netloc: | |
+ if "watch" in youtube_url.path: | |
+ query_params = parse_qs(youtube_url.query) | |
+ if 'v' in query_params: | |
+ sys.argv.extend(['-y', query_params['v'][0]]) | |
+ sys.argv.extend(['-o', f"{query_params['v'][0]}.json"]) | |
+ match = re.search(r"(v|e|embed|shorts|live|watch)/([\w-]{11})", youtube_url.path) | |
+ if match: | |
+ sys.argv.extend(['-y', match.group(2)]) | |
+ sys.argv.extend(['-o', f"{match.group(2)}.json"]) | |
+ if youtube_url.netloc == "youtu.be": | |
+ parsed_url = f"{youtube_url.path.lstrip('/')[:youtube_url.path.lstrip('/').find('&')] if '&' in youtube_url.path else youtube_url.path.lstrip('/')}" | |
+ sys.argv.extend(['-y', parsed_url]) | |
+ sys.argv.extend(['-o', f"{parsed_url}.json"]) | |
+ | |
args = parser.parse_args() if argv is None else parser.parse_args(argv) | |
youtube_id = args.youtubeid | |
@@ -73,11 +93,12 @@ def main(argv = None): | |
print(comment_str.decode('utf-8') if isinstance(comment_str, bytes) else comment_str, file=fp) | |
sys.stdout.write('Downloaded %d comment(s)\r' % count) | |
sys.stdout.flush() | |
- count += 1 | |
+ if comment is not None: | |
+ count += 1 | |
if pretty: | |
fp.write(' ' * INDENT +']\n}') | |
- print('\n[{:.2f} seconds] Done!'.format(time.time() - start_time)) | |
+ print(f"\nDone! {count} comments saved to {output} in {(time.time() - start_time):.2f} seconds") | |
except Exception as e: | |
print('Error:', str(e)) | |
diff --git a/youtube_comment_downloader/downloader.py b/youtube_comment_downloader/downloader.py | |
index de021cb..98a3951 100755 | |
--- a/youtube_comment_downloader/downloader.py | |
+++ b/youtube_comment_downloader/downloader.py | |
@@ -164,4 +164,4 @@ class YoutubeCommentDownloader: | |
else: | |
stack.append(value) | |
elif isinstance(current_item, list): | |
- stack.extend(current_item) | |
+ stack.extend(current_item) | |
\ No newline at end of file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment