Created
August 10, 2012 07:28
-
-
Save TauPan/3312277 to your computer and use it in GitHub Desktop.
modern port of dailystrips (http://sourceforge.net/projects/dailystrips/) to racket, with xpath and xslt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env racket | |
#lang racket/base | |
;; downloading and compiling those requires will take ages, but only | |
;; for the first time! | |
(require srfi/1 | |
srfi/13 | |
(planet bzlib/http:1:0) | |
(planet neil/htmlprag:1:6) | |
;; (except-in (planet lizorkin/ssax:2:0/ssax) | |
;; ;; conflicts with srfi-1: | |
;; fold-right | |
;; fold | |
;; filter | |
;; cons*) | |
(planet lizorkin/sxml:2:1/sxml) | |
racket/port | |
racket/function | |
racket/match) | |
(define (rx-match-node regex) | |
(lambda (nodeset bindings) | |
(filter ((curry regexp-match) | |
regex) | |
nodeset))) | |
;;; configuration: | |
(define comics | |
`(;; (name | |
;; author | |
;; homepage | |
;; baseurl (or homepage if #t, nothing if #f) | |
;; searchpage (or homepage if #f) | |
;; searchpattern | |
;; searchpattern (title, or '()) | |
(xkcd | |
"Randall Munroe" | |
"http://xkcd.com/" | |
#f | |
#f | |
(// img @ src *text* ,(rx-match-node "comics")) | |
(// img @ title *text*)) | |
(wumo | |
"Mikael Wulff and Anders Morgenthaler" | |
"http://wumocomicstrip.com/" | |
#t | |
#f | |
(// div a img @ src *text* ,(rx-match-node "/img/strip"))))) | |
(define-syntax-rule (without-output body ...) | |
(parameterize ((current-output-port (open-output-nowhere))) | |
body | |
...)) | |
(define (http-get-quiet url) | |
(without-output | |
(http-get url))) | |
(define get-sxml (compose html->sxml http-get-quiet)) | |
(define (list-for-path sym paths) | |
(match-let* (((list name author homepage baseurl searchpage xpath | |
xpathalt ...) | |
(assoc sym paths)) | |
(baseurl (if (eq? #t baseurl) | |
homepage | |
baseurl)) | |
(searchpage (or searchpage homepage)) | |
(xpathalt (if (null? xpathalt) | |
#f | |
(car xpathalt))) | |
(sxml (get-sxml searchpage))) | |
(filter (compose not string-null?) | |
(map (compose string-trim-both (curry string-append | |
(or baseurl ""))) | |
(append ((sxpath xpath) sxml) | |
(if xpathalt | |
((sxpath xpathalt) sxml) | |
'())))))) | |
(define (list-for-comic sym) | |
(list-for-path sym comics)) | |
;; (display (srl:sxml->html | |
;; `(html | |
;; (body | |
;; ,@(map (lambda (b) | |
;; `(span | |
;; (a | |
;; (@ | |
;; (href | |
;; ,(format "http://www.lastfm.de/search?q=~a&type=artist" b))) | |
;; ,b) | |
;; (br))) | |
;; (comic-results)))))) | |
(display (map (lambda (x) (cons x (list-for-comic x))) | |
(map car comics))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment