This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tamilstemmer import TamilStemmer | |
wordlist = [u'மலைகள்',u'பாடுதல்',u'ஓடினான்'] | |
#expected = [u'மலை',u'பாடு', u'ஓடி'] | |
ta_stemmer = TamilStemer() | |
for word in wordlist: | |
ta_stemmer.stemWord(word) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import lxml.html.clean as clean | |
from BeautifulSoup import BeautifulSoup | |
input_file = 'input.html' | |
output_file = 'output.html' | |
orig_content = open(input_file, 'rw').read() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Source http://stackoverflow.com/a/15741856/1301753 | |
import copy | |
import sys | |
import math | |
import pyPdf | |
def split_pages(src, dst): | |
src_f = file(src, 'r+b') | |
dst_f = file(dst, 'w+b') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<style type="text/css"> | |
body { | |
background: white; | |
font-size: 12pt; | |
} | |
strong,h3,h4{ | |
font-weight: 900; | |
color:midnightblue; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mechanize | |
import cookielib | |
# http://stockrt.github.com/p/emulating-a-browser-in-python-with-mechanize/ | |
# Browser | |
br = mechanize.Browser() | |
# Cookie Jar | |
cj = cookielib.LWPCookieJar() |
NewerOlder