Skip to content

Instantly share code, notes, and snippets.

@el-chogo
Last active April 22, 2016 00:02
Show Gist options
  • Save el-chogo/e65c9515ffcc252241d4af6a2bc496fa to your computer and use it in GitHub Desktop.
Save el-chogo/e65c9515ffcc252241d4af6a2bc496fa to your computer and use it in GitHub Desktop.
Joke-intended script, but might be useful for someone. First thing written in Lisp.
(with-output-to-string (*standard-output*)
(ql:quickload "drakma" )
(ql:quickload "plump" )
(ql:quickload "split-sequence" )
(ql:quickload "alexandria" )
(ql:quickload "cl-strings" )
(ql:quickload "clss" ))
(setf *random-state* (make-random-state t))
(defparameter *author*
(if (nth 1 *posix-argv*)
(nth 1 *posix-argv*)
"roberto.malaver" ))
(defun trim (str) (
string-trim '(#\Newline #\Space) str))
(defun clean (x)
(cl-strings:clean (cl-strings:clean x :char #\Newline) :char #\Space))
(defun format-title-filename (title)
(trim
(cl-strings:clean
(cl-strings:replace-all
(cl-strings:clean
(cl-strings:kebab-case
(cl-strings:clean-diacritics title)) :char #\- ) "," "") :char #\. )))
(defun get-article(html)
(let ((article (clean (trim (plump:text (vector-pop (clss:select ".articulo" html)))))))
article))
(defun get-title (html)
(trim (plump:text (car (plump:get-elements-by-tag-name html "h1")))))
(defun get-time (html)
(trim (plump:text (car (plump:get-elements-by-tag-name html "time")))))
(defun get-href (x) (plump:get-attribute x "href"))
(defun get-link-title (x) (concatenate 'string (format-title-filename (plump:text x)) ".html" ))
(defun prepend-aporrea (x)
(concatenate 'string
"http://www.aporrea.org"
x))
(defun partition (list cell-size)
(loop for cell on list by #'(lambda (list)
(nthcdr cell-size list))
collecting (subseq cell 0 cell-size)))
(defun get-filename (x y) (concatenate 'string *author* "/" (car (cdr x)) "/" y))
(defun save (x y)
(let ((filename (get-filename
(reverse
(split-sequence:split-sequence #\/ x)) y)))
(ensure-directories-exist (sb-ext:parse-native-namestring filename))
(if (not (probe-file (sb-ext:parse-native-namestring filename)))
(let ((article (drakma:http-request x :external-format-in :utf-8)))
(with-open-file (f (sb-ext:parse-native-namestring filename) :external-format :utf-8 :direction :output :if-does-not-exist :create )
(format f article))
(print filename))
nil)))
(ensure-directories-exist (concatenate 'string *author* "/file"))
(defun download-article (x y)
(save x y))
(defun download-articles (xs)
(let ((format-aporrea (alexandria:compose #'prepend-aporrea #'get-href)))
(map nil
#'(lambda (x) (handler-case (download-article (funcall format-aporrea x) (get-link-title x))
(USOCKET:TIMEOUT-ERROR () nil)))
xs)))
(defun get-articles (node)
(plump:get-elements-by-tag-name
(plump:get-element-by-id node "tablaautores") "a"))
(defun download-latest (url)
(let* ((html (drakma:http-request url :external-format-in :utf-8))
(node (plump:parse html)))
(download-articles (get-articles node))))
(defun rename (x)
(with-open-file (html x)
(let* ((node (plump:parse html))
(title (get-title node)))
(rename-file x (sb-ext:parse-native-namestring
(concatenate 'string (format-title-filename title) ".html"))))))
(defun pick-random(xs)
(let ((size (length xs)))
(nth (random size *random-state*) xs)))
(defun operate-on-html-file (file fn)
(with-open-file (html file)
(let ((node (plump:parse html)))
(funcall fn node))))
(defun pick-quote(x)
(pick-random(remove-if
#'(lambda(x) (> 20 (length (cl-strings:split x #\Space))))
(cl-strings:split x #\Newline))))
(defun properties (html)
(let ((title (get-title html))
(time (get-time html))
(article (get-article html)))
(format t "~s~%" title)
(format t "~s~%" time)
(format t "~s~%" (pick-quote article))))
(defun get-all-articles()
(directory (concatenate 'string *author* "/*/*.*")))
(download-latest
(concatenate 'string "http://www.aporrea.org/autores/"
*author*))
; (map 'nil
; #'(lambda(x) (operate-on-html-file x #'properties))
; (get-all-articles))
(operate-on-html-file (pick-random (get-all-articles)) #'properties)
(sb-ext:exit)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment