Last active
July 29, 2019 23:16
-
-
Save manuzhang/cc1a0cc49638f0f2a30af2e2d06dc6d3 to your computer and use it in GitHub Desktop.
Analyzing my overcast data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.time.ZonedDateTime | |
import java.time.format.DateTimeFormatter | |
import java.time.temporal.ChronoUnit | |
import $ivy.`com.lihaoyi::requests:0.1.8` | |
import $ivy.`org.scala-lang.modules::scala-xml:1.2.0` | |
import $ivy.`org.seleniumhq.selenium:selenium-chrome-driver:3.0.1` | |
import org.openqa.selenium.JavascriptExecutor | |
import org.openqa.selenium.chrome.{ChromeDriver, ChromeOptions} | |
import requests.TimeoutException | |
import scala.util.{Failure, Success, Try} | |
import scala.xml.XML | |
val doc = XML.loadFile("overcast.opml") | |
System.setProperty("webdriver.chrome.driver", "/Users/doriadong/bin/chromedriver") | |
val options = new ChromeOptions() | |
options.addArguments("--proxy-server=http://127.0.0.1:1087") | |
val driver = new ChromeDriver(options) | |
(doc \\ "outline").foreach { | |
outline => | |
if ((outline \@ "type") == "rss") { | |
val podcast = outline \@ "text" | |
val total = outline.nonEmptyChildren.map { node => | |
if (node \@ "played" == "1") { | |
val title = node \@ "title" | |
val url = node \@ "overcastUrl" | |
val source = node \@ "enclosureUrl" | |
val listenStr = node \@ "userUpdatedDate" | |
val pattern = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssXXX") | |
val listenTime = ZonedDateTime.from(pattern.parse(listenStr)) | |
val today = ZonedDateTime.now() | |
val from = today.minusDays(today.getDayOfWeek.getValue - 1).truncatedTo(ChronoUnit.DAYS) | |
val status = Try { | |
requests.get(source) | |
} match { | |
case Success(resp) => resp.statusCode | |
case Failure(e) => | |
if (e.isInstanceOf[TimeoutException]) { | |
200 | |
} else { | |
e.printStackTrace() | |
400 | |
} | |
} | |
// if (listenTime.isAfter(from) && !listenTime.isAfter(today)) { | |
if (status != 400) { | |
driver.get(url) | |
var duration = 0.0 | |
var n = 0 | |
var skip = false | |
val executor = driver.asInstanceOf[JavascriptExecutor] | |
while (duration == 0.0 && !skip) { | |
Thread.sleep(100 * n) | |
Try { | |
duration = executor.executeScript( | |
"return document.getElementById('audioplayer').duration").asInstanceOf[Double] | |
} match { | |
case Success(_) => | |
case Failure(e) => | |
println(s"Failed to get duration of $url from $podcast becasue of ${e.getMessage}") | |
skip = true | |
} | |
n += 1 | |
} | |
duration | |
// } | |
} else { | |
println(s"$source of $url from $podcast not found") | |
0.0 | |
} | |
} else { | |
0.0 | |
} | |
}.sum | |
os.write.append(os.pwd / "overcast_report.csv", s"$podcast,$total\n") | |
} | |
} | |
driver.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output till 2019-07-21T20:00:00-04:00