Last active
April 14, 2022 00:13
-
-
Save ayu-mushi/994b98ef5eb4f83161153196e18c9ec3 to your computer and use it in GitHub Desktop.
Detailed history pages for w3m cgi
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from tika import parser | |
import argparse | |
import os | |
import sys | |
def print_title(parsed, filename): | |
try: | |
print(parsed['metadata']['title']) | |
except KeyError: | |
sys.stderr.write("that document don't has title.") | |
def print_abstract(parsed, filename): | |
try: | |
print(parsed['content'].strip(" \n")[0:100]) | |
except TypeError: | |
sys.stderr.write("the content of that document is corrupt") | |
except KeyError: | |
sys.stderr.write("the content of that document is corrupt") | |
except AttributeError: | |
sys.stderr.write("the content of that document is corrupt") | |
print "" | |
def alls(parsed, filename): | |
print("path: " + filename) | |
print("title:") | |
print_title(parsed, filename) | |
print("abstract:") | |
print_abstract(parsed, filename) | |
def execute_for_each(f, args): | |
for n in args.filenames: | |
parsed = parser.from_file(n) | |
f(parsed, n) | |
argparser = argparse.ArgumentParser(description='') | |
subparsers = argparser.add_subparsers(help='sub-command help') | |
pt = subparsers.add_parser("title", help="print title") | |
pt.add_argument("filenames", type=str, nargs='*') | |
pt.set_defaults(func=(lambda(args): execute_for_each(print_title, args))) | |
pa = subparsers.add_parser("abst", help="print abstract") | |
pa.add_argument("filenames", type=str, nargs='*') | |
pa.set_defaults(func=(lambda(args): execute_for_each(print_abstract, args))) | |
c_print_all = subparsers.add_parser("all", help="print all") | |
c_print_all.add_argument("filenames", type=str, nargs='*') | |
c_print_all.set_defaults(func=(lambda(args): execute_for_each(alls, args))) | |
args = argparser.parse_args() | |
args.func(args) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env zsh | |
# w3m cgiへ詳細履歴を生成 | |
# generate history in detail in w3m | |
# for local cgi | |
function echo1() { | |
echo "$1" | |
echo "" | |
} | |
echo "Content-type: text/html" | |
echo1 "<html>" | |
Q=$(python -c "import urlparse \\ | |
q=urlparse.parse_qs('$QUERY_STRING') \\ | |
print(q['n'][0]) | |
print(q['ext'][0]) | |
print(q['size'][0]) | |
") | |
N=$(echo "$Q"|hwk "(!! 0)") | |
EXT=$(echo "$Q"|hwk "(!! 1)") | |
VIEW_SIZE=$(echo "$Q"|hwk "(!! 2)") | |
echo1 "<title>$EXT history</title>" | |
echo1 "<h1>$EXT history</h1>" | |
echo1 "<ul>" | |
for LOCATION in $(ls -t ~/.w3m/*."$EXT" | sed -n "$(($N+1)),$(($N+$VIEW_SIZE))p") | |
do | |
echo1 "<li>" | |
case $EXT in | |
html) | |
echo1 "<p><a href='$LOCATION'>Title: $(grep --before 6 --after 6 "<title>" $LOCATION | sed -n -e "/<title>/,/<\/title>/p" | sed -e "s/<[^>]*>//g")</a></p>" | |
;; | |
*) echo1 "<p><a href='$LOCATION'>Title: $(doctitle title $LOCATION)</a></p>";; | |
esac | |
echo1 "<p>Location: '$LOCATION'</p>" | |
case $EXT in | |
pdf) | |
echo1 "<p>Abstract: $(pdftotext -l 3 -enc UTF-8 $LOCATION - | head -10)</p>";; | |
*) | |
echo1 "<p>Abstract: $(perl -pe 's/(\n| )//g' $LOCATION | perl -pe 's/<script[\s\S]*<\/script>//g'| perl -pe 's/<style[\s\S]<\/style>//g' | perl -pe 's/<.*?>//g' | awk '{print substr($0, 1, 100)}')</p>";; | |
esac | |
echo1 "</li>" | |
done | |
echo1 "</ul>" | |
echo1 "<div><a href='?n=$(($N+$VIEW_SIZE))&ext=$EXT&size=$VIEW_SIZE'>next</a></div>" | |
echo1 "<div><a href='?n=0&size=7&ext=pdf'>pdf history</a></div>" | |
echo1 "<div><a href='?n=0&size=20&ext=html'>html history</a></div>" | |
echo1 "</html>" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment