Last active
April 22, 2016 00:02
-
-
Save el-chogo/e65c9515ffcc252241d4af6a2bc496fa to your computer and use it in GitHub Desktop.
Joke-intended script, but might be useful for someone. First thing written in Lisp.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(with-output-to-string (*standard-output*) | |
(ql:quickload "drakma" ) | |
(ql:quickload "plump" ) | |
(ql:quickload "split-sequence" ) | |
(ql:quickload "alexandria" ) | |
(ql:quickload "cl-strings" ) | |
(ql:quickload "clss" )) | |
(setf *random-state* (make-random-state t)) | |
(defparameter *author* | |
(if (nth 1 *posix-argv*) | |
(nth 1 *posix-argv*) | |
"roberto.malaver" )) | |
(defun trim (str) ( | |
string-trim '(#\Newline #\Space) str)) | |
(defun clean (x) | |
(cl-strings:clean (cl-strings:clean x :char #\Newline) :char #\Space)) | |
(defun format-title-filename (title) | |
(trim | |
(cl-strings:clean | |
(cl-strings:replace-all | |
(cl-strings:clean | |
(cl-strings:kebab-case | |
(cl-strings:clean-diacritics title)) :char #\- ) "," "") :char #\. ))) | |
(defun get-article(html) | |
(let ((article (clean (trim (plump:text (vector-pop (clss:select ".articulo" html))))))) | |
article)) | |
(defun get-title (html) | |
(trim (plump:text (car (plump:get-elements-by-tag-name html "h1"))))) | |
(defun get-time (html) | |
(trim (plump:text (car (plump:get-elements-by-tag-name html "time"))))) | |
(defun get-href (x) (plump:get-attribute x "href")) | |
(defun get-link-title (x) (concatenate 'string (format-title-filename (plump:text x)) ".html" )) | |
(defun prepend-aporrea (x) | |
(concatenate 'string | |
"http://www.aporrea.org" | |
x)) | |
(defun partition (list cell-size) | |
(loop for cell on list by #'(lambda (list) | |
(nthcdr cell-size list)) | |
collecting (subseq cell 0 cell-size))) | |
(defun get-filename (x y) (concatenate 'string *author* "/" (car (cdr x)) "/" y)) | |
(defun save (x y) | |
(let ((filename (get-filename | |
(reverse | |
(split-sequence:split-sequence #\/ x)) y))) | |
(ensure-directories-exist (sb-ext:parse-native-namestring filename)) | |
(if (not (probe-file (sb-ext:parse-native-namestring filename))) | |
(let ((article (drakma:http-request x :external-format-in :utf-8))) | |
(with-open-file (f (sb-ext:parse-native-namestring filename) :external-format :utf-8 :direction :output :if-does-not-exist :create ) | |
(format f article)) | |
(print filename)) | |
nil))) | |
(ensure-directories-exist (concatenate 'string *author* "/file")) | |
(defun download-article (x y) | |
(save x y)) | |
(defun download-articles (xs) | |
(let ((format-aporrea (alexandria:compose #'prepend-aporrea #'get-href))) | |
(map nil | |
#'(lambda (x) (handler-case (download-article (funcall format-aporrea x) (get-link-title x)) | |
(USOCKET:TIMEOUT-ERROR () nil))) | |
xs))) | |
(defun get-articles (node) | |
(plump:get-elements-by-tag-name | |
(plump:get-element-by-id node "tablaautores") "a")) | |
(defun download-latest (url) | |
(let* ((html (drakma:http-request url :external-format-in :utf-8)) | |
(node (plump:parse html))) | |
(download-articles (get-articles node)))) | |
(defun rename (x) | |
(with-open-file (html x) | |
(let* ((node (plump:parse html)) | |
(title (get-title node))) | |
(rename-file x (sb-ext:parse-native-namestring | |
(concatenate 'string (format-title-filename title) ".html")))))) | |
(defun pick-random(xs) | |
(let ((size (length xs))) | |
(nth (random size *random-state*) xs))) | |
(defun operate-on-html-file (file fn) | |
(with-open-file (html file) | |
(let ((node (plump:parse html))) | |
(funcall fn node)))) | |
(defun pick-quote(x) | |
(pick-random(remove-if | |
#'(lambda(x) (> 20 (length (cl-strings:split x #\Space)))) | |
(cl-strings:split x #\Newline)))) | |
(defun properties (html) | |
(let ((title (get-title html)) | |
(time (get-time html)) | |
(article (get-article html))) | |
(format t "~s~%" title) | |
(format t "~s~%" time) | |
(format t "~s~%" (pick-quote article)))) | |
(defun get-all-articles() | |
(directory (concatenate 'string *author* "/*/*.*"))) | |
(download-latest | |
(concatenate 'string "http://www.aporrea.org/autores/" | |
*author*)) | |
; (map 'nil | |
; #'(lambda(x) (operate-on-html-file x #'properties)) | |
; (get-all-articles)) | |
(operate-on-html-file (pick-random (get-all-articles)) #'properties) | |
(sb-ext:exit) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment