Skip to content

Instantly share code, notes, and snippets.

@satyr
Created May 16, 2009 15:43
Show Gist options
  • Save satyr/112717 to your computer and use it in GitHub Desktop.
Save satyr/112717 to your computer and use it in GitHub Desktop.
/* appjet:version 0.1 */
import('storage', 'lib-at0m', 'dlog', 'cron');
const
Skr = 'http://shokenro.jp/',
Author = Atom.Person('筒井康隆', {uri: 'http://www.jali.or.jp/tti/'}),
Rights = 'Yasutaka Tsutsui',
Title = '笑犬楼大通り 偽文士日碌',
Cover = 'http://shokenro.jp/shokenro/book-cover/',
Icon = 'http://shokenro.jp/tmp-img/favicon.ico';
page.setFavicon(Icon);
{ let p = request.params;
type = p.type || '';
size = +p.size || 540;
num = p.n || 1e3;
}
dispatch();
if(appjet.isPreview && !request.isCron)
scheduleThem('indices', 97,
'pages', 47);
function get_(){
print(
UL(LI(link('/txt', 'てきすと')),
LI(link('/img', 'いめーじ'))),
FORM(
{action: '/atom'},
FIELDSET(
LEGEND({title: 'Atom feed'}, 'あとむ'),
LABEL(
{'for': 'T'}, 'たいぷ ',
SELECT(
{id: 'T', name: 'type'},
OPTION({value: '', selected: 'selected'}, 'いめーじ'),
OPTION({value: 'text'}, 'てきすと'))),
LABEL(
{'for': 'S', title: 'ページ画像の高さ'}, 'さいず',
INPUT({id: 'S', name: 'size', size: 4})),
LABEL(
{'for': 'N', title: 'エントリの最大数'}, 'かず',
INPUT({id: 'N', name: 'n', size: 4})),
INPUT({type: 'submit'}))));
}
function get_txt(){
Array.forEach(storage.indices, function(idx){
var d = storage[idx.number];
page.body.write(
'<p class="title">'+ d.title +'</p><pre>'+ d.text +'</pre>\n');
});
hideFooter();
}
function get_img(){
var hr = '<hr class="-"/>\n';
Array.forEach(storage.indices, function(idx, i, idc){
var d = storage[idx.number];
page.body.write(iMerge(d.images));
~i + idc.length && page.body.write(hr);
});
page.head.write('<style>body{text-align:center}</style>\n');
hideFooter();
}
function get_atom(){
var txt = /^te?xt/i.test(type), idc = storage.indices;
Atom({
title: Title,
subtitle: 'Full-feed of '+ Cover,
author: Author,
link: {href: Cover},
icon: Icon,
updated: new Date,
}, Array.slice(idc).reverse().slice(0, num).map(function(idx){
var d = storage[idx.number];
if(!d) return false;
var ent = {
title: d.title,
link: {href: Skr + idx.number},
updated: d.update,
author: Author,
rights: Rights,
};
ent.content = txt ? {type: 'text', _: d.text} : {
type: 'html',
_: iMerge(d.images),
};
return ent;
}).filter(Boolean)).write();
}
function cron_indices(){
var opt = {followRedirects: false},
re = / href=\"\/(\d+)[^<]+<img src='\/(.+?)'[^>]*?><[^>]*>([^<]+)/g;
for each(let i in [3, 4, 5, 6, 7]){
let htm, m;
try{ htm = wget(Skr +'shokenro/index'+ i +'/', 0, opt) }
catch(e if e.message == 302){ break }
while((m = re(htm))){
let [, num, src, ttl] = m;
if(storage[num]) continue;
dlog.info('new index: '+ ttl);
Array.push(storage.indices, {number: num, image: src, title: ttl});
}
}
dlog.info('crawled indices');
response.stop(false);
}
function cron_pages(){
var limit = 3;
Array.every(storage.indices, function(idx){
var {number:number, title:title} = idx, d = storage[number];
if(d && d.text) return true;
var page = {title: title, text: '', images: {length: 0}};
var opts = {followRedirects: false, complete: true};
for(let i = +number, n = 3; n--; ++i){
let {data:htm, headers:heads} =
wget(Skr + (1e7 + i +'').slice(1), 0, opts);
let src = (/<img src='\/(img\/\d+)' width='740'/(htm) || 0)[1];
if(!src) break;
Array.push(page.images, Skr + src);
page.text +=
(/ class="book-contents-text">([^]+?)(?=<\/div)/(htm) || ' ')[1];
page.update = (heads['Last-Modified'] || heads.Date || '')[0];
}
page.text = page.text.replace(/<br \/>/g, '').replace(/[\r\n]+/g, '\n');
storage[number] = page;
storage.last = storage[number];
dlog.info('new page: ', page.title);
return --limit > 0;
});
dlog.info('crawled pages');
response.stop(false);
}
function iMerge(imgs){
return Array.map(imgs, function(src){
return IMG({src: src, height: size}) }).join('<br />');
}
function hideFooter(){
page.head.write(STYLE('#appjetfooter{display:none}'));
}
function last(a, n){ return a[a.length - (n || 1)] }
function scheduleThem(){
unscheduleAll();
for(var i = 0, l = arguments.length; i < l; i += 2)
scheduleRepeating(new Date(new Date - 60 * 60e3),
arguments[i+1], '/'+ arguments[i],
0, false, true);
}
/* appjet:css */
body { font-family: sans-serif }
p.title { font-size: 120%; padding: 4px }
pre { padding-bottom: 16px }
label { padding-right: 8px }
input { margin-left: 4px }
.- { width: 100% }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment