Last active
January 25, 2018 14:36
-
-
Save tanyagupta/a6af754b9cd898bbba07ce1e9a7324b2 to your computer and use it in GitHub Desktop.
Use the YouTube ID of Udacity close captioned videos to extract the transcript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function doGet() { // this starts the Google Apps server | |
var template = HtmlService.createTemplateFromFile('extract_text_from_youtube'); | |
var title_string='Get transcript from Udacity Video'; | |
return template.evaluate() | |
.setTitle(title_string).setSandboxMode(HtmlService.SandboxMode.IFRAME); | |
} | |
function getTranscript(id){ | |
var resp = UrlFetchApp.fetch("http://video.google.com/timedtext?lang=en&v="+id) //UrlFetchapp to avoid ajax issues related to CORS | |
resp = stripTags(resp.toString()); | |
return resp; | |
} | |
function stripTags(string){ //written as server function because google does extra processing with string that messes up the tag removal | |
Logger.log(string) | |
string = string.replace(/<\/?[^>]+(>|$)/g, ""); //replaces the html tags with blank string | |
return string; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="utf-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1"> | |
<script src="//code.jquery.com/jquery-1.10.2.js"></script> | |
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-alpha.3/css/bootstrap.min.css" integrity="sha384-MIwDKRSSImVFAZCVLtU0LMDdON6KVCrZHyVQQj6e8wIEJkW4tvwqXrbMIya1vriY" crossorigin="anonymous"> | |
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-alpha.3/js/bootstrap.min.js" integrity="sha384-ux8v3A6CPtOTqOzMKiuo3d/DomGaaClxFYdCu2HPMBEkf6x2xiDyJ7gkXU0MWwaD" crossorigin="anonymous"></script> | |
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css"> <!--addeded on dec 27--> | |
<title>Get captions</title> | |
</head> | |
<body> | |
<div class="jumbotron"> | |
<h1 class="display-3">Get my GWG Udacity video transcript</h1> | |
<p class="lead">This is a simple script that gets the closed caption content from Udacity videos - for my personal use and for Grow with Google scholars only</p> | |
<hr class="my-4"> | |
</div> | |
<div class="container"> | |
<div class="form-group row"> | |
<label for="example-url-input" class="col-2 col-form-label">YouTube ID (*not url*)</label> | |
<div class="col-10"> | |
<input id ="url_text" class="form-control" type="url" placeholder="(id of Udacity youtube url goes here)" id="example-url-input"> | |
</div> | |
<div class="row"> | |
<div class="col-md-3"> | |
<button id="start" type="submit" class="btn btn-primary">Get my Udacity transcript</button></div> | |
<div class="col-md-3"> | |
<button id="clear" type="submit" class="btn btn-primary">Clear everything</button></div> | |
</div> | |
</div> | |
<div class="row"></div> | |
<div id="display" class="row"> | |
<div class="card"> | |
<div class="card-block"> | |
<h4 class="card-title">Notes</h4> | |
<strong>Feedback welcome!</strong><br> | |
<!-- <h6 class="card-subtitle mb-2 text-muted"><a target="_blank" href="mailto:#">[email protected]</a> </h6>--> | |
<h6 class="card-subtitle mb-2 text-muted"><a target="_blank" href="https://medium.com/@tanyagupta">@tanyagupta</a> </h6> | |
<p class="card-text"></p> | |
<ul><li> Only tested on Udacity videos that have close captioning </li> | |
<li>For right now the input *has* to be the id only not the whole url. The id is usually the last few characters after the "=" of the url. For example if the url is https://www.youtube.com/watch?time_continue=2&v=bmH31DLKrFw the id is bmH31DLKrFw </li> | |
<li> Error handling or expanding this script is on my to-do list</li> | |
</ul> | |
</div> | |
</div> | |
</div> | |
</div> | |
</body> | |
</html> | |
<script> | |
$( "#start" ).click(function(e) { | |
$("#display").html(""); | |
var url = $("#url_text").val(); | |
google.script.run.withSuccessHandler(function(data){ | |
var d = document.createElement("div"); | |
d.innerHTML = data; | |
var x = d.firstChild.nodeValue; | |
$("#display").html(x); | |
}).getTranscript(url) | |
}); | |
$("#clear" ).click(function(e) { | |
$("#display").html(""); | |
$("#url_text").val(""); | |
}); | |
</script> | |
<style><!--//comment – ideally separate css file but just two entries so using style tag --> | |
.btn{ | |
padding: 5%; | |
margin-top: 5%; | |
} | |
.card{ | |
background-color: #f1f1f1; | |
} | |
</style> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment