Skip to content

Instantly share code, notes, and snippets.

View tshrinivasan's full-sized avatar

Shrinivasan T tshrinivasan

View GitHub Profile
@tshrinivasan
tshrinivasan / வேர்ச்சொல்_வடிகட்டி.py
Created March 1, 2019 13:48
வேர்ச்சொல்_வடிகட்டி.py
from tamilstemmer import TamilStemmer
wordlist = [u'மலைகள்',u'பாடுதல்',u'ஓடினான்']
#expected = [u'மலை',u'பாடு', u'ஓடி']
ta_stemmer = TamilStemer()
for word in wordlist:
ta_stemmer.stemWord(word)
@tshrinivasan
tshrinivasan / clean-html.py
Created March 8, 2017 03:12
Clean HTML Pages
import lxml.html.clean as clean
from BeautifulSoup import BeautifulSoup
input_file = 'input.html'
output_file = 'output.html'
orig_content = open(input_file, 'rw').read()
@tshrinivasan
tshrinivasan / split-page.py
Last active October 19, 2024 18:18
Split a PDF vertically, used for scanned double sided PDF pages
# Source http://stackoverflow.com/a/15741856/1301753
import copy
import sys
import math
import pyPdf
def split_pages(src, dst):
src_f = file(src, 'r+b')
dst_f = file(dst, 'w+b')
@tshrinivasan
tshrinivasan / CSS for FreeTamilEbooks.com
Last active August 29, 2015 14:03
CSS for FreeTamilEbooks.com
<style type="text/css">
body {
background: white;
font-size: 12pt;
}
strong,h3,h4{
font-weight: 900;
color:midnightblue;
}
import mechanize
import cookielib
# http://stockrt.github.com/p/emulating-a-browser-in-python-with-mechanize/
# Browser
br = mechanize.Browser()
# Cookie Jar
cj = cookielib.LWPCookieJar()