Last active
November 28, 2017 03:23
-
-
Save dvanhorn/20e70550da40113f512dc998839a7122 to your computer and use it in GitHub Desktop.
arXiv cs.PL feed reader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang racket | |
(provide get-news cs.PL) | |
(require xml net/url) | |
(define cs.PL "http://export.arxiv.org/rss/cs.PL") | |
;; get-news : String -> [Listof [List String String String]] | |
;; Extract news (title, url, authors) from arXiv RSS feed | |
(define (get-news rss) | |
(match (read-xml (get-pure-port (string->url rss))) | |
[(document p (element _ _ name _ content) _) | |
(filter-map | |
(match-lambda | |
[(element _ _ 'item _ content) | |
(filter-map | |
(match-lambda | |
[(element _ _ 'title _ (list (pcdata _ _ t) ...)) | |
(apply string-append t)] | |
[(element _ _ 'link _ (list (pcdata _ _ l))) l] | |
[(element _ _ 'dc:creator _ (list p ...)) | |
(authors p)] | |
[_ #false]) | |
content)] | |
[_ #f]) | |
content)])) | |
(define (ignore p i) | |
(unless (zero? i) | |
(read-char p) | |
(ignore p (sub1 i)))) | |
;; read-elems : String -> [Listof Element] | |
;; Read elements discarding comma separators | |
(define (read-elems str) | |
(define in (open-input-string str)) | |
(define (loop) | |
(cond [(eof-object? (peek-char in)) '()] | |
[(regexp-match-peek-positions #rx"<" in) | |
=> (match-lambda [(list (cons i _)) | |
(begin (ignore in i) | |
(cons (read-xml/element in) | |
(loop)))])] | |
[else '()])) | |
(loop)) | |
;; oxford : [Listof String] -> String | |
;; Append strings using Oxford-comma | |
(define (oxford xs) | |
(apply string-append | |
(match xs | |
['() '()] | |
[(list x) (list x)] | |
[(list x1 x2) | |
(list x1 " and " x2)] | |
[_ | |
(add-between xs ", " #:before-last ", and ")]))) | |
;; authors : [Listof Element] -> String | |
;; List of authors in Oxford-comma style | |
(define (authors p) | |
(oxford (map (match-lambda | |
[(element _ _ 'a _ c) | |
(apply string-append | |
(map (match-lambda | |
[(pcdata _ _ s) s] | |
[(entity _ _ (? integer? i)) | |
(string (integer->char i))]) | |
c))]) | |
(read-elems | |
(apply string-append | |
(map (match-lambda | |
[(pcdata _ _ s) s]) | |
p)))))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment