Created
March 25, 2015 16:06
-
-
Save answerquest/272a0b83a999fea62f9e to your computer and use it in GitHub Desktop.
youtube data extract
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <style> | |
| table, th, td { | |
| border: 1px solid black; | |
| border-collapse:collapse; | |
| } | |
| th, td { | |
| padding: 5px; | |
| } | |
| pre { | |
| white-space: pre-wrap; /* css-3 */ | |
| white-space: -moz-pre-wrap; /* Mozilla, since 1999 */ | |
| white-space: -pre-wrap; /* Opera 4-6 */ | |
| white-space: -o-pre-wrap; /* Opera 7 */ | |
| word-wrap: break-word; /* Internet Explorer 5.5+ */ | |
| font-family: Calibri, Arial, serif; | |
| height: 170px; | |
| overflow: auto; /*longer descriptions will get a scrollbar */ | |
| } | |
| body { | |
| font-family: Calibri, Arial, serif; | |
| } | |
| </style> | |
| <base target="_blank"> | |
| </head> | |
| <body> | |
| <h2>Youtube channel browser</h2> | |
| <p> | |
| Channel / Username: <input id="in_channel" value="arvindguptatoys"> <small>like arvindguptatoys, etc. put exact username, seen as in https://www.youtube.com/user/arvindguptatoys </small><br> | |
| <!-- | |
| OR, Playlist id: <input id="in_playlist"> <small>like PL6E92FE8AF927E4F1 in http://www.youtube.com/playlist?list=PL6E92FE8AF927E4F1 . Leave above channel box blank if using this. </small><br> | |
| --> | |
| Search query, if any: <input id="in_search"> <small>like Hindi, English, Marathi, pump, etc. Put space between two words, no special chars. Leave it blank if you just want listing. This only searches title, not description.</small> <br> | |
| Index from <input id="in_startindex" value="1"> to <input id="in_endindex" value="500"> <small>(1 to 500 will list the latest 500 videos) (note: if using search terms, cannot go beyond 500)</small> <br> | |
| <input type="button" value="Click to List" onclick="listchannel()"> | |
| <div id="out_listing"></div> | |
| <br><br><div id="out_images"></div> | |
| <br>CSV: <textarea id="out_raw"></textarea> | |
| <br>XML: <textarea id="out_xml"></textarea> | |
| <br> | |
| Specific video data: Put in 11 char videoid: <input id="singlevideo_in" value="b7l3PLUSEUg"> <input type="button" value="See this video's views" onclick="document.getElementById('viewcount_out').value = getViewCount(document.getElementById('singlevideo_in').value);"> <small>Like in http://www.youtube.com/watch?v=5L3sDgPaGH4 the video id is 5L3sDgPaGH4</small> | |
| <br>View Counts: <textarea id="viewcount_out"></textarea> | |
| <script> | |
| function listchannel() { | |
| document.getElementById("out_listing").innerHTML = "Loading.. please wait.."; | |
| var PITARA = ""; | |
| var RAW = ""; var IMAGES=""; var XML=""; var COUNTSLIST=""; | |
| var CHANNEL = document.getElementById("in_channel").value.trim(); | |
| var PLAYLIST = ""; //document.getElementById("in_playlist").value.trim(); | |
| var SEARCH = document.getElementById("in_search").value ? "&q=" + document.getElementById("in_search").value.replace(/ /g,"+") : ""; | |
| var STARTINDEX = parseInt(document.getElementById("in_startindex").value); | |
| var ENDINDEX = parseInt(document.getElementById("in_endindex").value); | |
| var COUNT = 0; | |
| var PREFIX="http://www.youtube.com/watch?v="; | |
| var THUMBNAILPRE="<img src=\"https://i.ytimg.com/vi/"; | |
| var THUMBNAILPOST="/mqdefault.jpg\">"; | |
| var XMLFEED=""; | |
| var FEED_PLAYLIST=""; | |
| var FEED_CHANNEL=""; | |
| XML = "<?xml version='1.0' encoding='UTF-8'?>\n<feed>\n"; | |
| if (window.XMLHttpRequest) | |
| {// code for IE7+, Firefox, Chrome, Opera, Safari | |
| xmlhttp=new XMLHttpRequest(); | |
| } | |
| else | |
| {// code for IE6, IE5 | |
| xmlhttp=new ActiveXObject("Microsoft.XMLHTTP"); | |
| } | |
| /* ######## MAHALOOP BEGINS ############ */ | |
| for (INDEX=STARTINDEX; INDEX<=ENDINDEX; INDEX+=50) { | |
| FEED_PLAYLIST = "http://gdata.youtube.com/feeds/api/playlists/"+PLAYLIST+"?max-results=50&start-index="+INDEX+SEARCH | |
| // like http://gdata.youtube.com/feeds/api/playlists/PL6E92FE8AF927E4F1 | |
| FEED_CHANNEL = "https://gdata.youtube.com/feeds/api/users/"+CHANNEL+"/uploads?max-results=50&start-index="+INDEX+SEARCH; | |
| // like https://gdata.youtube.com/feeds/api/users/breakingtheset/uploads?max-results=50&start-index=1 | |
| XMLFEED = CHANNEL ? FEED_CHANNEL : FEED_PLAYLIST ; | |
| console.log(XMLFEED) | |
| xmlhttp.open("GET",XMLFEED,false); | |
| xmlhttp.send(); | |
| xmlDoc=xmlhttp.responseXML; | |
| var x=xmlDoc.getElementsByTagName("entry"); | |
| console.log(x.length + " videos found in " + XMLFEED); | |
| if(x.length == 0) { | |
| PITARA += "<p>No more results found from index "+INDEX+" onwards"; | |
| break; } | |
| PITARA += "<p>Listing "+x.length+ " videos from index "+INDEX+" onwards from "+XMLFEED+"</p>"; | |
| PITARA += "<table><tr><th>Image</th><th width=200>Title</th><th>Description</th></tr>"; | |
| var KEY=""; | |
| var CONTENT=""; | |
| for (i=0;i<x.length;i++) | |
| { | |
| KEY=x[i].getElementsByTagName("id")[0].childNodes[0].nodeValue.split("/").splice(-1); | |
| PITARA += "<tr><td>"; | |
| //PITARA += "<a href=\""+PREFIX+KEY+"\">"+THUMBNAILPRE+KEY+THUMBNAILPOST+"</a>"; | |
| PITARA += THUMBNAILPRE+KEY+THUMBNAILPOST; //thumbnail without link | |
| PITARA += "</td><td>"; | |
| PITARA += "<small>" + (INDEX+i)+".</small>"; | |
| PITARA += "<h3><a href=\""+PREFIX+KEY+"\">"+x[i].getElementsByTagName("title")[0].childNodes[0].nodeValue+"</a></h3>"; | |
| PITARA += "<small>Duration: " + secondsToHms(x[i].getElementsByTagName("yt:duration")[0].getAttribute('seconds')); // to access <yt:duration seconds='245'/> | |
| PITARA += ", " + x[i].getElementsByTagName("yt:statistics")[0].getAttribute('viewCount') + " views"; | |
| PITARA += "<br>Published on " + x[i].getElementsByTagName("published")[0].childNodes[0].nodeValue.substring(0, 10); | |
| PITARA += "<br>Video id: " + KEY; | |
| PITARA += "</small>"; | |
| PITARA += "</td><td>"; | |
| if(x[i].getElementsByTagName("media:description")[0].childNodes[0] === undefined) | |
| CONTENT = ""; | |
| else CONTENT = x[i].getElementsByTagName("media:description")[0].childNodes[0].nodeValue.replace(/"/g, "'"); | |
| PITARA += "<pre>"+CONTENT+"</pre>"; | |
| PITARA += "</td></tr>"; | |
| // for CSV: | |
| RAW += "\"" + KEY + "\",\"" + x[i].getElementsByTagName("title")[0].childNodes[0].nodeValue + "\",\"" + secondsToHms(x[i].getElementsByTagName("yt:duration")[0].getAttribute('seconds')) + "\",\"" + x[i].getElementsByTagName("published")[0].childNodes[0].nodeValue.substring(0, 10) + "\",\"" + CONTENT.replace(/\n/g, "<br />") + "\",\"" + x[i].getElementsByTagName("yt:statistics")[0].getAttribute('viewCount') + "\"\n"; | |
| // for images list: | |
| IMAGES += "<a href=\"http://i.ytimg.com/vi/" + KEY + "/mqdefault.jpg\">" + KEY + ".jpg</a><br>"; | |
| // for XML: | |
| XML += "<entry>\n<id>" + KEY + "</id>\n<title><![CDATA[" + x[i].getElementsByTagName("title")[0].childNodes[0].nodeValue + "]]></title>\n<duration>" + secondsToHms(x[i].getElementsByTagName("yt:duration")[0].getAttribute('seconds')) + "</duration>\n<date>" + x[i].getElementsByTagName("published")[0].childNodes[0].nodeValue.substring(0, 10) + "</date>\n<description><![CDATA[" + CONTENT.replace(/\n/g, "<br />") + "]]></description>\n<viewCount>" + x[i].getElementsByTagName("yt:statistics")[0].getAttribute('viewCount') + "</viewCount>\n</entry>\n\n"; | |
| // for total ViewCount: | |
| COUNT += parseInt(x[i].getElementsByTagName("yt:statistics")[0].getAttribute('viewCount')); | |
| COUNTSLIST += KEY + "\t" + x[i].getElementsByTagName("yt:statistics")[0].getAttribute('viewCount') + "\n"; | |
| } // closing the for loop for traversing though each entry in the currently loaded xml. | |
| PITARA += "</table>"; | |
| PITARA += "Total viewCount = " + COUNT ; | |
| //PITARA += "<p>INDEX = "+INDEX+" listing over.</p>"; //was only for debugging. | |
| } //MAHALOOP CLOSED | |
| XML += "</feed>"; | |
| document.getElementById("out_listing").innerHTML = PITARA; | |
| //document.getElementById("out_images").innerHTML = "Thumbnails listed below. Use DownThemAll, with renaming mask set to *text* to archive them.<br>" + IMAGES; | |
| document.getElementById("out_raw").value = RAW; | |
| document.getElementById("out_xml").value = XML; | |
| document.getElementById("viewcount_out").value = COUNTSLIST; | |
| } // function listchannel() closed | |
| function secondsToHms(d) { | |
| /* got this from http://stackoverflow.com/a/5539081/4355695 */ | |
| d = Number(d); | |
| var h = Math.floor(d / 3600); | |
| var m = Math.floor(d % 3600 / 60); | |
| var s = Math.floor(d % 3600 % 60); | |
| return ((h > 0 ? h + ":" : "") + (m > 0 ? (h > 0 && m < 10 ? "0" : "") + m + ":" : "0:") + (s < 10 ? "0" : "") + s); } | |
| function getViewCount(ID) { | |
| var COUNT; var XMLFEED; | |
| XMLFEED = "https://gdata.youtube.com/feeds/api/videos/" + ID + "?v=2"; | |
| //loading XML | |
| if (window.XMLHttpRequest) | |
| {// code for IE7+, Firefox, Chrome, Opera, Safari | |
| xmlhttp=new XMLHttpRequest(); | |
| } | |
| else | |
| {// code for IE6, IE5 | |
| xmlhttp=new ActiveXObject("Microsoft.XMLHTTP"); | |
| } | |
| xmlhttp.open("GET",XMLFEED,false); | |
| xmlhttp.send(); | |
| xmlDoc=xmlhttp.responseXML; | |
| // XML loading done | |
| var x=xmlDoc.getElementsByTagName("yt:statistics"); | |
| COUNT = x[0].getAttribute('viewCount'); | |
| return COUNT; | |
| } | |
| </script> | |
| </body> | |
| </html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment