Last active
July 1, 2019 12:36
-
-
Save Ex-Ark/b320d896863bb9301e8af953fb41d8a9 to your computer and use it in GitHub Desktop.
view the most blocked countries in a youtube playlist
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# why : i'm watching a serie on youtube with some episodes blocked in certain country (each time differents ones) | |
# i don't want to switch my VPN server after each episode and try all servers to find a country allowed | |
# the script gives me the country less blocked in the entire playlist | |
# what it does : | |
# given a playlist url | |
# parses the playlist without using the google API (scrapping) | |
# for each video found it calls unblockvideos API | |
# returns a hash with the number of times a country is blocked | |
# TODO : get all country codes and make a MINUS operation with the blocked countries to determine country with 0 block in the playlist. | |
require 'http' | |
require 'nokogiri' | |
require 'cgi' | |
require 'uri' | |
UNBLOCK_URL = 'http://api.unblockvideos.com/youtube_restrictions'.freeze | |
# NO API KEY REQUIRED, LIMITATION IS 1000 video_ids per day' | |
puts 'Enter playlist (full url):' | |
playlist_url = gets.chomp | |
# avoid mobile version | |
playlist_url << "&app=desktop" | |
# forge headers | |
h = { "Accept" => "text/html", | |
"cookie-installing-permission" => "required", | |
"user-agent" => "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Mobile Safari/537.36", | |
"referer" => "https://www.youtube.com/" | |
} | |
# scrap youtube | |
response = Http.follow.get playlist_url, headers: h | |
puts 'playlist error' and exit(1) unless response.status.success? | |
playlist = Nokogiri::HTML(response.body.to_s) | |
ids = Set.new | |
# videos are located in table > tr > a > watch?v= | |
playlist.css('a[@href]').map { |a| a['href'] }.select { |link| link.include? '/watch?' }.uniq.each do |video| | |
video_link = URI(video) | |
# we need only the ?v=XXXXXX part of the url | |
video_id = CGI.parse(video_link.query || "").delete('v').first | |
ids.add video_id | |
end | |
blocked = {} | |
ids.each_slice(15) do |part| | |
# given multiple video ID, returns the country codes in which they are blocked | |
response = Http.get UNBLOCK_URL, params: { id: part.join(',') } | |
response.parse(:json).each do |video| | |
country_codes = video['blocked'] | |
country_codes.each do |country| | |
# count the occurences | |
blocked[country].nil? ? blocked[country] = 1 : blocked[country] = blocked[country] + 1 | |
end | |
end | |
end | |
# result, the least blocked country is at the bottom of the hash | |
puts blocked | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example :
Output (ISO 3166) :
{"FR"=>11, "TF"=>11, "YT"=>11, "MF"=>11, "BL"=>11, "GF"=>11, "MC"=>11, "AD"=>11, "RE"=>11, "PF"=>11, "NC"=>11, "WF"=>11, "GP"=>11, "MQ"=>11, "PM"=>11, "AE"=>2, "AF"=>2, "AG"=>2, "AI"=>2, "AL"=>2, "AM"=>2, "AO"=>2, "AQ"=>2, "AR"=>2, "AT"=>2, "AU"=>2, "AW"=>2, "AX"=>2, "AZ"=>2, "BA"=>2, "BB"=>2, "BD"=>2, "BE"=>2, "BF"=>2, "BG"=>2, "BH"=>2, "BI"=>2, "BJ"=>2, "BM"=>2, "BN"=>2, "BO"=>2, "BQ"=>2, "BR"=>2, "BS"=>2, "BT"=>2, "BV"=>2, "BW"=>2, "BY"=>2, "BZ"=>2, "CA"=>2, "CC"=>2, "CD"=>2, "CF"=>2, "CG"=>2, "CH"=>2, "CI"=>2, "CK"=>2, "CL"=>2, "CM"=>2, "CN"=>2, "CO"=>2, "CR"=>2, "CU"=>2, "CV"=>2, "CW"=>2, "CX"=>2, "CY"=>2, "CZ"=>2, "DE"=>2, "DJ"=>2, "DK"=>2, "DM"=>2, "DO"=>2, "DZ"=>2, "EC"=>2, "EE"=>2, "EG"=>2, "EH"=>2, "ER"=>2, "ES"=>2, "ET"=>2, "FI"=>2, "FJ"=>2, "FK"=>2, "FM"=>2, "FO"=>2, "GA"=>2, "GB"=>2, "GD"=>2, "GE"=>2, "GG"=>2, "GH"=>2, "GI"=>2, "GL"=>2, "GM"=>2, "GN"=>2, "GQ"=>2, "GR"=>2, "GS"=>2, "GT"=>2, "GW"=>2, "GY"=>2, "HK"=>2, "HM"=>2, "HN"=>2, "HR"=>2, "HT"=>2, "HU"=>2, "IC"=>1, "ID"=>2, "IE"=>2, "IL"=>2, "IM"=>2, "IN"=>2, "IO"=>2, "IQ"=>2, "IR"=>2, "IS"=>2, "IT"=>2, "JE"=>2, "JM"=>2, "JO"=>2, "JP"=>2, "KE"=>2, "KG"=>2, "KH"=>2, "KI"=>2, "KM"=>2, "KN"=>2, "KP"=>2, "KR"=>75, "KW"=>2, "KY"=>2, "KZ"=>2, "LA"=>2, "LB"=>2, "LC"=>2, "LI"=>2, "LK"=>2, "LR"=>2, "LS"=>2, "LT"=>2, "LU"=>2, "LV"=>2, "LY"=>2, "MA"=>2, "MD"=>2, "ME"=>2, "MG"=>2, "MK"=>2, "ML"=>2, "MM"=>2, "MN"=>2, "MO"=>2, "MR"=>2, "MS"=>2, "MT"=>2, "MU"=>2, "MV"=>2, "MW"=>2, "MX"=>2, "MY"=>2, "MZ"=>2, "NA"=>2, "NE"=>2, "NF"=>2, "NG"=>2, "NI"=>2, "NL"=>2, "NO"=>2, "NP"=>2, "NR"=>2, "NU"=>2, "NZ"=>2, "OM"=>2, "PA"=>2, "PE"=>2, "PG"=>2, "PH"=>2, "PK"=>2, "PL"=>2, "PN"=>2, "PS"=>2, "PT"=>2, "PW"=>2, "PY"=>2, "QA"=>2, "RO"=>2, "RS"=>2, "RU"=>2, "RW"=>2, "SA"=>2, "SB"=>2, "SC"=>2, "SD"=>2, "SE"=>2, "SG"=>2, "SH"=>2, "SI"=>2, "SJ"=>2, "SK"=>2, "SL"=>2, "SM"=>2, "SN"=>2, "SO"=>2, "SR"=>2, "SS"=>2, "ST"=>2, "SV"=>2, "SX"=>2, "SY"=>2, "SZ"=>2, "TC"=>2, "TD"=>2, "TG"=>2, "TH"=>1, "TJ"=>2, "TK"=>2, "TL"=>2, "TM"=>2, "TN"=>2, "TO"=>2, "TR"=>2, "TT"=>2, "TV"=>2, "TW"=>1, "TZ"=>2, "UA"=>2, "UG"=>2, "UY"=>2, "UZ"=>2, "VA"=>2, "VC"=>2, "VE"=>2, "VG"=>2, "VN"=>2, "VU"=>2, "WS"=>2, "YE"=>2, "ZA"=>2, "ZM"=>2, "ZW"=>2, "MP"=>1, "VI"=>1, "AS"=>1, "PR"=>1, "UM"=>1, "MH"=>1, "US"=>1, "GU"=>1}
Most blocked countries : FR(France) / TF(French Southern Territories) / YT(Mayotte) ...
Least blocked country : GU(Guam) / US(United States) / PR (Puerto Rico) ...