Created
February 20, 2013 07:08
-
-
Save kuwa72/4993567 to your computer and use it in GitHub Desktop.
Gauche CGI, Scrape site-titles in LAN nodes with threads. LAN内で動いているWebサイトからTitleを抜いてリンク集にするCGIです。 マルチスレッドのおかげで概ね高速に動作しますが、Cygwinのpthreadでは情報が抜ける場合が多いです。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/Gauche/bin/gosh | |
(use text.html-lite) | |
(use rfc.http) | |
(use rfc.uri) | |
(use www.cgi) | |
(use sxml.ssax) | |
(use sxml.sxpath) | |
(load "htmlprag.scm") | |
(use srfi-27) ; random-integer | |
(use srfi-43) ; vector-swap! | |
(use gauche.threads) | |
;from http://d.hatena.ne.jp/rui314/20070118/p1 | |
(random-source-randomize! default-random-source) | |
(define (shuffle lis) | |
(let1 vec (list->vector lis) | |
(do ((i (- (vector-length vec) 1) (- i 1))) | |
((= i 0) (vector->list vec)) | |
(vector-swap! vec i (random-integer (+ i 1)))))) | |
(define (intlist n) | |
(if (= n 0) | |
() | |
(cons n (intlist (- n 1))))) | |
(define (gettitle host path) | |
(guard (exc (else ())) ;drop error | |
(let-values (((status head body) (http-get host path))) | |
(let ((xml (html->shtml body))) | |
(list | |
status | |
(uri-compose :scheme "http" :host host :path path) | |
((sxpath "/html/head/title/text()") xml)))))) | |
(define (mkthr a) | |
(if (null? a) | |
() | |
(cons | |
(thread-start! (make-thread (lambda _(gettitle | |
(string-join (list "192.168.11." (number->string (car a))) "") | |
"/")))) | |
(mkthr (cdr a))))) | |
(define (main args) | |
(let ((urls (remove null? (map thread-join! (mkthr (shuffle (intlist 255))))))) | |
(cgi-main | |
(lambda (params) | |
`(,(cgi-header) | |
,(html-doctype) | |
,(html:html | |
(html:head (html:title "list")) | |
(html:body | |
(html:ul | |
(map (lambda (v) | |
(if (equal? (car v) "200") | |
(html:li | |
(html:a :href (car (cdr v)) | |
(cdr (cdr v)) | |
)) | |
() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment