Shrinivasan T tshrinivasan

Ebooks publisher at FreeTamilEbooks.com

tshrinivasan / gist:3682456

Created September 9, 2012 03:48

	import mechanize
	import cookielib

	# http://stockrt.github.com/p/emulating-a-browser-in-python-with-mechanize/

	# Browser
	br = mechanize.Browser()

	# Cookie Jar
	cj = cookielib.LWPCookieJar()

tshrinivasan / CSS for FreeTamilEbooks.com

Last active August 29, 2015 14:03

CSS for FreeTamilEbooks.com

tshrinivasan / split-page.py

Last active October 19, 2024 18:18

Split a PDF vertically, used for scanned double sided PDF pages

	# Source http://stackoverflow.com/a/15741856/1301753

	import copy
	import sys
	import math
	import pyPdf

	def split_pages(src, dst):
	src_f = file(src, 'r+b')
	dst_f = file(dst, 'w+b')

tshrinivasan / clean-html.py

Created March 8, 2017 03:12

Clean HTML Pages

	import lxml.html.clean as clean

	from BeautifulSoup import BeautifulSoup

	input_file = 'input.html'
	output_file = 'output.html'


	orig_content = open(input_file, 'rw').read()

tshrinivasan / வேர்ச்சொல்_வடிகட்டி.py

Created March 1, 2019 13:48

வேர்ச்சொல்_வடிகட்டி.py

	from tamilstemmer import TamilStemmer

	wordlist = [u'மலைகள்',u'பாடுதல்',u'ஓடினான்']
	#expected = [u'மலை',u'பாடு', u'ஓடி']

	ta_stemmer = TamilStemer()

	for word in wordlist:
	ta_stemmer.stemWord(word)

tshrinivasan / remove_strings_from_files.py

Created March 31, 2019 11:00

#This program helps to remove the given words in a file to all the files inside a directory, recursively. # Got the sed idea from http://www.linuxask.com/questions/replace-multiple-strings-using-sed

	#This program helps to remove the given words in a file to all the files inside a directory, recursively.
	# Got the sed idea from http://www.linuxask.com/questions/replace-multiple-strings-using-sed


	import sys
	import glob
	import os
	import argparse

	parser = argparse.ArgumentParser()

tshrinivasan / OverPassToGoogleSheet.gs

Created May 26, 2019 09:54

OverPassToGoogleSheet.gs

	//var langCode ='ta'; -- TODO Make it language independent.

	function doGet() {
	return HtmlService.createTemplateFromFile('Index.html')
	.evaluate();
	}

	function doSomething() {
	Logger.log('I was called!');
	}

tshrinivasan / parse-voter-list.py

Created October 3, 2019 12:49

Code to parse voter list pdf - ocred by tesseract

	import sys

	in_file = sys.argv[1]


	content = open(in_file).read()
	out = open("result.csv","a")

	con = content.split("வாக்காளர்‌ பெயர்‌")

tshrinivasan / fix_records.py

Created November 22, 2019 07:39

A program to find and replace bibliographical data

	# program name : fix_records.py
	# author : [email protected]
	# version : 0.1


	import sys
	import os
	import argparse

	parser = argparse.ArgumentParser(description='A program to find and replace bibliographical data')

tshrinivasan / tess_ocr_pdf.py

Created May 23, 2020 16:36

Convert a PDF file to text using Tesseract OCR