Created
December 20, 2011 17:09
-
-
Save tvwerkhoven/1502341 to your computer and use it in GitHub Desktop.
look up phonetic spelling Apple's Dictionary.app through DictionaryServices
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2.7 | |
# -*- coding: utf-8 -*- | |
""" | |
@file mac-dict.py -- lookup entries in the Mac dictionary | |
@author Tim van Werkhoven | |
@date 20111220 | |
@copyright Copyright (c) 2011 Tim van Werkhoven | |
Use DictionaryServices to look up words in Apple's Dictionary.app. | |
Loosely based on work by Alex Kontsevoy, see <http://tech-kovoy.blogspot.com/2008/07/how-to-make-use-of-dictionary-services.html> | |
This file is licensed under the Creative Commons Attribution-Share Alike | |
license versions 3.0 or higher, see | |
http://creativecommons.org/licenses/by-sa/3.0/ | |
""" | |
import DictionaryServices | |
import re | |
import unittest | |
import os, sys | |
# Define some contants | |
AUTHOR = "Tim van Werkhoven <[email protected]>" | |
DATE = "20111220" | |
VERBOSITY = 0 | |
def main(): | |
## First check if we need tests (do this before argparse because otherwise we might run into conflicts) | |
if ('--test' in sys.argv): | |
print "Bypassing regular program, running tests now!" | |
# Remove '--test' string from sys.argv, keep all others | |
sys.argv = [el for el in sys.argv if el not in ['--test']] | |
return unittest.main() | |
## Parse arguments check options | |
(parser, args) = parsopts() | |
remlist = "',.:;\"\n’" | |
if (args.infile): | |
if (VERBOSITY > 1): print "main(): args.infile" | |
fd = open(args.infile, "r") | |
for line in fd.xreadlines(): | |
if (VERBOSITY > 2): print "main(): line = %s" % (line) | |
for i in remlist: | |
line = line.replace(i, "") | |
if (VERBOSITY > 2): print "main(): procline = %s" % (line) | |
for word in line.split(" "): | |
print dict_IPA(word), | |
fd.close() | |
else: | |
if (VERBOSITY > 1): print "main(): else branch" | |
for word in args.words: | |
print dict_IPA(word), | |
def dict_IPA(word): | |
"""Given a word, return the pronunciation in IPA""" | |
# Pre-process | |
word = word.strip() | |
if (len(word) < 1): | |
return | |
# Lookup word | |
if (VERBOSITY > 1): print "dict_IPA(word = %s)" % (word) | |
entry = DictionaryServices.DCSCopyTextDefinition(None, word, (0,len(word) )) | |
if (not entry): | |
return "n/a" | |
if (VERBOSITY > 2): print "dict_IPA(): %s" % (entry) | |
# Find word within entry, IPA should be the next line | |
# #IPA_beg = entry.index(word+"\n") + len(word) + 1 | |
# IPA_beg = re.search(word+"\s*\d*\s*\n", entry, re.IGNORECASE) | |
# if (not IPA_beg and word[-1] == "s"): | |
# if (VERBOSITY > 2): print "dict_IPA(): word[-1] == s" | |
# IPA_beg = re.search(word[:-1]+".\s*\d*\s*\n", entry, re.IGNORECASE) | |
# | |
# if (IPA_beg): | |
# IPA_beg = IPA_beg.end() | |
# else: | |
# return | |
# #IPA_beg2 = re.search("\n|.*|\n", entry[IPA_beg1:]).end() + IPA_beg1 | |
IPA_beg = entry.index("\n") + 1 | |
IPA_end = entry.index("\n", IPA_beg) | |
if (VERBOSITY > 2): print "dict_IPA(): %d -- %d" % (IPA_beg, IPA_end) | |
# Extract IPA, remove vertical bars | |
IPA = entry[IPA_beg+1:IPA_end-1] | |
if (VERBOSITY > 2): print "dict_IPA(): %s" % (IPA) | |
return IPA | |
def parsopts(): | |
"""Parse program options, check sanity and return results""" | |
import argparse | |
parser = argparse.ArgumentParser(description="Lookup words from Apple's Dictonary.app. You can change the language by opening Dictionary.app's preferences and only enabling the dictionary you want to use.", epilog='Comments & bugreports to %s' % (AUTHOR)) | |
parser.add_argument('words', metavar='W', type=str, nargs='*', default=[], | |
help='words to lookup.') | |
parser.add_argument('--infile', metavar='F', type=str, | |
help='read words from F.') | |
parser.add_argument('-v', dest='debug', action='append_const', const=1, | |
help='increase verbosity') | |
parser.add_argument('-q', dest='debug', action='append_const', const=-1, | |
help='decrease verbosity') | |
args = parser.parse_args() | |
# Check & fix some options | |
checkopts(parser, args) | |
# Return results | |
return (parser, args) | |
def checkopts(parser, args): | |
"""Check program options sanity""" | |
if (args.infile): | |
if (not os.path.exists(args.infile)): | |
print "Error: infile does not exist!" | |
parser.print_usage() | |
sys.exit(0) | |
elif (not os.path.isfile(args.infile)): | |
print "Warning: infile is not a regular file!" | |
elif (not len(args.words)): | |
print "Error: need at least one word or an input file" | |
parser.print_usage() | |
sys.exit(0) | |
if (args.debug): | |
global VERBOSITY | |
VERBOSITY = sum(args.debug) | |
print "checkopts(): verbosity: %d" % (VERBOSITY) | |
### Tests start here | |
class TestWords(unittest.TestCase): | |
def setUp(self): | |
"""Make list of words""" | |
self.words = [u"worse", u"pronunciation", u"Terpsichore"] | |
self.ipa_uk = [u"wəːs", u"prənʌnsɪˈeɪʃ(ə)n", u"təːpˈsɪkəri"] | |
self.ipa_us = [u"wərs", u"prəˌnənsiˈeɪʃən", u"tərpˈsɪkəri"] | |
# Explore environment / settings | |
def test0a_detect_lang(self): | |
"""Detect which dictionary we are using (US or UK)""" | |
if (dict_IPA("worse").strip() == u"wərs"): | |
self.lang = "US" | |
print "Got US dictionary" | |
elif (dict_IPA("worse").strip() == u"wəːs"): | |
self.lang = "UK" | |
print "Got UK dictionary" | |
else: | |
raise RuntimeError("Unknown dictionary language!") | |
# Static tests | |
def test1a_ipa(self): | |
"""Test pre-defined words for pronunciation""" | |
this_ipa = self.ipa_uk | |
print self.lang | |
if (self.lang == "US"): | |
this_ipa = self.ipa_us | |
for word, ipa in zip(self.words, this_ipa): | |
testipa = dict_IPA(word).strip() | |
print u"%s=%s" % (testipa, ipa) | |
self.assertEqual(dict_IPA(word).strip(), ipa) | |
# This must be the final part of the file, code after this won't be executed | |
if __name__ == "__main__": | |
sys.exit(main()) | |
### Scratch space here | |
print dict_IPA("worse") | |
print dict_IPA("pronunciation") | |
print dict_IPA("Terpsichore") | |
print dict_IPA("Terpsichore") | |
### EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment