e9t · October 9, 2015 17:58
diff --git a/README.md b/README.md
diff --git a/convert.py b/convert.py
 #! /usr/bin/python2.7
 # -*- coding: utf-8 -*-

 import html5lib
 import os
 from glob import glob

 def get_filenames(directory):
    return glob(os.path.join(directory, '*'))

 def get_xpaths(filename):
    with open(filename, 'r') as f:
        p = html5lib.HTMLParser(\
                tree=html5lib.treebuilders.getTreeBuilder("lxml"),\
                namespaceHTMLElements=False)
        page = p.parse(f)
        xp = page.xpath(xpaths)
        return xp

 def print_txt(filename, data):
    with open(filename, 'w') as f:
        f.write("<html>\n<head>\n")
        f.write("<meta charset=utf-8>\n")
        f.write("</head>\n<body>")
        for d in data:
            d = d.encode('utf-8')
            f.write(d)
            f.write("<br>")
        f.write("</body>")

 def main(directory, xpaths):
    filenames = get_filenames(directory)

    for f in filenames:
        print 'processing ' + f
        xp = get_xpaths(f)
        f = 'html' + f[5:-4] + '.html'
        print_txt(f, xp)
        print 'done'

 if __name__=='__main__':
    directory = '''./smi/'''
    xpaths = "//body//text()"

    main(directory, xpaths)
diff --git a/iconv.sh b/iconv.sh
 #!/bin/bash

 FENC="cp949"
 TENC="utf-8"

 for oldfile in `ls *.smi`;
 do
    newfile="${oldfile%%.*}.utf-8.smi"
    echo "converting $oldfile to $newfile"
    iconv -f $FENC -t $TENC $oldfile > $newfile
 done
diff --git a/mv.sh b/mv.sh
 #!/bin/bash

 for oldfile in `ls Desperate\ Ho*.smi`;
 do
    newfile="Desperate.Housewives.s01e$oldfile.smi"
    echo "converting $oldfile to $newfile"
    mv $oldfile $newfile
 done
	#! /usr/bin/python2.7
	# -- coding: utf-8 --

	import html5lib
	import os
	from glob import glob

	def get_filenames(directory):
	return glob(os.path.join(directory, '*'))

	def get_xpaths(filename):
	with open(filename, 'r') as f:
	p = html5lib.HTMLParser(\
	tree=html5lib.treebuilders.getTreeBuilder("lxml"),\
	namespaceHTMLElements=False)
	page = p.parse(f)
	xp = page.xpath(xpaths)
	return xp

	def print_txt(filename, data):
	with open(filename, 'w') as f:
	f.write("<html>\n<head>\n")
	f.write("<meta charset=utf-8>\n")
	f.write("</head>\n<body>")
	for d in data:
	d = d.encode('utf-8')
	f.write(d)
	f.write("<br>")
	f.write("</body>")

	def main(directory, xpaths):
	filenames = get_filenames(directory)

	for f in filenames:
	print 'processing ' + f
	xp = get_xpaths(f)
	f = 'html' + f[5:-4] + '.html'
	print_txt(f, xp)
	print 'done'

	if __name__=='__main__':
	directory = '''./smi/'''
	xpaths = "//body//text()"

	main(directory, xpaths)
	#!/bin/bash

	FENC="cp949"
	TENC="utf-8"

	for oldfile in `ls *.smi`;
	do
	newfile="${oldfile%%.*}.utf-8.smi"
	echo "converting $oldfile to $newfile"
	iconv -f $FENC -t $TENC $oldfile > $newfile
	done
	#!/bin/bash

	for oldfile in `ls Desperate\ Ho*.smi`;
	do
	newfile="Desperate.Housewives.s01e$oldfile.smi"
	echo "converting $oldfile to $newfile"
	mv $oldfile $newfile
	done