This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from BeautifulSoup import BeautifulSoup | |
import urllib | |
class MozillaOpener(urllib.FancyURLopener): | |
version = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; sv-SE; rv:1.9.1.3) Gecko/20090824 Firefox/3.5.3' | |
urllib._urlopener = MozillaOpener() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w -CD | |
binmode STDOUT, ":utf8"; | |
binmode STDIN, ":utf8"; | |
no warnings; | |
#-----Description------------------------------------------------------ | |
# | |
# Program:urdu-segmenter.pl | |
# Written by: Danish Munir |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
This script is a python implementation of Danish Munir's perl urdu-segmenter. | |
http://www.cs.cmu.edu/afs/cs/user/lsl/Nice/Urdu-MT/code/Tools/Sentence_Segmenter/urdu-segmenter.pl | |
""" | |
import sys | |
import re | |
from optparse import OptionParser |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> import nltk.data | |
>>> splitter = nltk.data.load('tokenizers/punkt/english.pickle') | |
>>> splitter.tokenize('I think Washington D.C. is neato') | |
['I think Washington D.C.', 'is neato'] | |
>>> splitter.tokenize('I think Washington D. C. is neato') | |
['I think Washington D. C. is neato'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> import urllib | |
>>> import simplejson | |
>>> url = urllib.urlopen('http://search.twitter.com/search.json?q=j2labs&rpp=1') # limit search to just 1 tweet | |
>>> json = url.read() | |
>>> usable_data = simplejson.loads(json) | |
>>> usable_data | |
{'next_page': '?page=2&max_id=6038882667&rpp=1&q=j2labs', 'completed_in': 0.039456999999999999, 'refresh_url': '?since_id=6038882667&q=j2labs', 'results': [{'iso_language_code': 'en', 'text': "Marc and Adam Allen's new band, Helen Earth Band, have released their new cd!! http://bit.ly/91HpIq :: Please RT #helenearthband", 'created_at': 'Wed, 25 Nov 2009 03:16:18 +0000', 'profile_image_url': 'http://a1.twimg.com/profile_images/435479248/Picture_1_normal.png', 'source': '<a href="http://twitter.com/">web</a>', 'from_user': 'j2labs', 'from_user_id': 54476112, 'to_user_id': None, 'geo': None, 'id': 6038882667L}], 'since_id': 0, 'results_per_page': 1, 'query': 'j2labs', 'max_id': 6038882667L, 'page': 1} | |
>>> usable_data['results'][0]['text'] | |
"Marc and Adam Allen's ne |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# query server for hit count | |
query_mt('get_hit_count', server_key, secret_key) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# query server for hit count | |
query_mt('get_hit_count', server_key, secret_key) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def addLike(self, uid=None, post_id=None): | |
""" | |
Facebook API call. See http://developers.facebook.com/documentation.php?v=1.0&method=stream.addLike | |
""" | |
args = {} | |
if uid is not None: args['uid'] = uid | |
if post_id is not None: args['post_id'] = post_id | |
return self('addLike', args) | |
def removeLike(self, uid=None, post_id=None): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import uuid | |
from boto.mturk.connection import MTurkConnection | |
from boto.mturk.question import Question, QuestionForm | |
from boto.mturk.question import QuestionContent, ExternalQuestion | |
from boto.mturk.question import AnswerSpecification, FreeTextAnswer | |
from boto.mturk.question import SelectionAnswer | |
from boto.mturk.question import Overview | |
from boto.mturk.qualification import * |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from arabic_rtlize.process import rtlize | |
from PIL import Image, ImageFont, ImageDraw | |
real_urdu = "اس صفحہ کو ترامیم کیلیۓ نـیـم محفوظ کر دیا گیا ہے اور صارف کو اندراج کر کے داخل نوشتہ ہونا لازم ہے۔" | |
urdu = rtlize(unicode(real_urdu, 'UTF-8')) | |
img = Image.new("L", (1000,200)) | |
draw = ImageDraw.Draw(img) | |
f = ImageFont.truetype('/Users/jd/Desktop/urduimg/geeza.ttf', 20) | |
draw.text((10,40), urdu, fill=255, font=f) |
OlderNewer