Last active
November 17, 2019 22:19
-
-
Save elfsternberg/104b642c356b1ba6a29acd3f6087f0c7 to your computer and use it in GitHub Desktop.
Given a Twitter Archive folder, dump every original (i.e. not retweeted) text in the folder.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env hy | |
; As quick and dirty as it gets. Tested on hy 0.17 | |
; [hy](http://docs.hylang.org/en/stable/index.html) | |
(require [hy.extra.anaphoric [ap-each ap-reduce ap-map ap-filter]]) | |
(require [hy.contrib.walk [let]]) | |
(import json os) | |
(defn file-list [] | |
(->> (os.listdir ".") | |
(ap-filter (and (= (cut (.lower it) 0 2) "20") ; This will stop working in the 21st century. | |
(= (cut (.lower it) -3) ".js"))))) | |
(defn tweets [filename] | |
(let [f (open filename "r")] | |
(.readline f) ; Skip opening "Grailbird" line; this makes it parsable as JSON | |
(.loads json (.read f)))) | |
(let [words (->> | |
(file-list) | |
(list) | |
(sorted) | |
(map tweets) ; convert list of files into list of lists of tweets | |
(chain.from_iterable) ; flatten list | |
(filter (fn [t] (not (in "retweet_status" t)))) ; remove retweets | |
(map (fn [t] (get t "text"))))] ; return text | |
(ap-each words (print it))) | |
; COPYRIGHT: | |
; This program is copyrighted (c) 2019 | |
; Elf M. Sternberg ([email protected]) | |
; | |
; LICENSE: | |
; https://creativecommons.org/licenses/by/4.0/ | |
; | |
; This is free software released under the CC-BY license. Users of | |
; this software enjoy the following rights and responsibilities: | |
; | |
; Share — You may copy and redistribute the material in any medium or format | |
; | |
; Adapt — You may remix, transform, and build upon the material for | |
; any purpose, even commercially. | |
; | |
; Attribution — You must give appropriate credit, provide a link to | |
; the license, and indicate if changes were made. You may do so in any | |
; reasonable manner, but not in any way that suggests the licensor | |
; endorses you or your use. | |
; | |
; You may not employ technological measures or legal terms that | |
; legally prevent others from doing anything the license permits. THE | |
; SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
; OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY | |
; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment