Last active
July 2, 2017 12:36
-
-
Save ayu-mushi/9ed7b4c489cc5f20b6825673ed4baddd to your computer and use it in GitHub Desktop.
Comment for each web page written for w3m browser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# printenvを使ってw3mでページ毎にコメントを記録できるようにする | |
# 今まで作ったURLと注釈ファイル名(タイトル、ダブったら数字を付ける)の組の列をjsonで保持 連想配列 | |
# URLを渡すと注釈ファイルが開く | |
# keymap A EXEC_SHELL "w3m_commentary.py" | |
import lxml.html | |
import json | |
import os | |
import re | |
import subprocess | |
def readContent(path, encoding="utf-8"): | |
f = open(path, encoding=encoding, errors="ignore") | |
content = f.read() | |
f.close() | |
return content | |
def envvar (env, varname): #get environment variable | |
return re.findall(varname + "=(.*)", env)[0] | |
def read_srcfile(src_filename, encoding="utf-8"): | |
name, ext = os.path.splitext(src_filename) | |
if ext == ".gz" or ext == "gz": | |
import gzip | |
f = gzip.open(src_filename, "rt", encoding=encoding, errors="ignore") | |
content = f.read() | |
f.close() | |
return content | |
else: | |
return readContent(src_filename, encoding) | |
def gettitle(srcfile): | |
dom = lxml.html.fromstring(srcfile.encode("utf-8")) | |
return dom.xpath("//title")[0].text | |
def getlength(w3m_url): | |
text = subprocess.check_output("w3m -dump "+w3m_url, shell=True).decode() | |
return len(text) | |
#ファイル名として問題がある記号を取り除く | |
def sub_for_more_filenameness(pagetitle): | |
t = re.sub(r'[\ \n/]', "_", pagetitle) + ".md" | |
return t | |
#urlが違うのにタイトルが同じというような重複があればプレフィックス(数)を付ける | |
#ツイートにめもりたいとき必要 | |
def maketitle(pagetitle, url, comms): | |
reversed_comms = {v:k for k, v in comms.items()} | |
if pagetitle in reversed_comms: | |
if reversed_comms.get(pagetitle) != url: | |
prefix = re.match("[0-9]+", pagetitle) | |
if prefix is not None: | |
return maketitle(str(int(prefix.group())+1)+re.sub("[0-9]+", "", pagetitle), url, comms) | |
else: | |
return maketitle("0"+pagetitle,url,comms) | |
return pagetitle | |
def load_commantaries(): | |
dirpath = os.path.expanduser("~/w3m_commentary/") | |
path = dirpath + "commentaries.json" | |
if not os.path.exists(dirpath): | |
os.mkdir(dirpath) | |
if os.path.exists(path): | |
return json.loads(readContent(path)) | |
else: | |
f = open(path, "w") | |
f.write("{}") | |
f.close() | |
return {} | |
env = subprocess.check_output("printenv", shell=True).decode(errors="ignore") | |
w3m_url = envvar(env, "W3M_URL") | |
comms = load_commantaries() | |
if not w3m_url in comms : | |
w3m_sourcefile = envvar(env, "W3M_SOURCEFILE") | |
w3m_charset = envvar(env, "W3M_CHARSET") | |
srcfile = read_srcfile(w3m_sourcefile, encoding=w3m_charset) | |
commname = maketitle(sub_for_more_filenameness(gettitle(srcfile)), w3m_url, comms) | |
print(commname) | |
particular_comm = open(os.path.expanduser("~/w3m_commentary/"+commname), "w") | |
particular_comm.write(gettitle(srcfile) + "\n====\n" + w3m_url + "\nlength: " + str(getlength(w3m_url))) | |
particular_comm.close() | |
comms[w3m_url] = commname | |
f = open(os.path.expanduser("~/w3m_commentary/commentaries.json"), "w") | |
json.dump(comms, f) | |
f.close() | |
os.system("vim " + '"' + os.path.expanduser("~/w3m_commentary/" + comms[w3m_url]) + '"') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment