Skip to content

Instantly share code, notes, and snippets.

@nournia
nournia / matching.py
Last active December 11, 2015 07:19
Dadegan valency matching
import codecs
from search.models import Valency, Dependency
for verb in codecs.open('static/verbs.txt', encoding='utf8'):
verb = verb.strip()
count = len(Dependency().findVerbSentences(verb))
for valency in Valency().find(verb):
print '%-15s\t%s %s' % ('%d/%d' % (len(valency['sentences']), count), valency['verb']['surface'].encode('utf8'), Valency().str(valency['verb']['valency']).encode('utf8'))
@nournia
nournia / chunk.py
Last active April 13, 2021 08:57
Convert dependency tree to chunk
from pymongo import Connection
connection = Connection()
dependencies = connection.dadegan.dependencies
def traverse(parent, node, chunk, force=False):
words[int(node['index'])] = node
if not chunk:
@nournia
nournia / print.html
Last active December 21, 2015 16:29
Template for printing Persian document.
<meta charset="utf-8" />
<script type="text/javascript" src="http://code.jquery.com/jquery-1.10.2.min.js"></script>
<script type="text/javascript">
$(function() {
$('img').wrap('<div class="figure"></div>').parent().parent().addClass('container').each(function() {
$(this).find('img').after('<span class="title">'+ $(this).find('img').attr('alt') +'</span>');
});
})
</script>
<style type="text/css">
@nournia
nournia / verbs.py
Created October 16, 2014 12:39
Verb Conjugations
from hazm import VerbValencyReader, Lemmatizer
valency = VerbValencyReader('/home/alireza/Corpora/valency.txt')
lemmatizer = Lemmatizer()
output = open('verbs.txt', 'w')
verbs = set([verb.past_light_verb +'#'+ verb.present_light_verb for verb in valency.verbs()])
# coding: utf8
from __future__ import unicode_literals
maketrans = lambda A, B: dict((ord(a), b) for a, b in zip(A, B))
buckwalter_transliteration = maketrans('\'>&<}AbptvjHxd*rzs$SDTZEg_fqklmnhwYyFNKaui~o^#`{:@"[;,.!-+%]', 'ءأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىيًٌٍَُِّْٓٔٱۣۜ۟۠ۢۥۦ۪ۭۨ۫۬')
# usage
print 'r~aHoma`ni'.translate(buckwalter_transliteration)
# uses https://github.com/matthewdowney/TogglPy
from __future__ import print_function
import codecs
from TogglPy import Toggl
toggl = Toggl()
toggl.setAPIKey('<API_KEY>')
output = codecs.open('hours.csv', 'w', 'utf8')