Skip to content

Instantly share code, notes, and snippets.

@Slater-Victoroff
Slater-Victoroff / ElasticEnchant
Created June 28, 2013 18:23
Pyenchant spell checking with ElasticSearch
class ElasticEnchant:
def __init__(self, esDatabase):
self.es_instance = esDatabase
def produce_dictionary(self, output_file, **kwargs):
"""Produces a dictionary or updates it depending on kwargs
If no kwargs are given then this method will write a full dictionary including all
entries in all indices and types and output it in an enchant-friendly way to the output file.
@Slater-Victoroff
Slater-Victoroff / PyGrep
Created June 28, 2013 18:22
Simple grepping for files in python in a nice useful way.
import os
class PyGrep:
def __init__(self, directory):
self.directory = directory
def grab_all_files_with_ending(self, file_ending):
"""Will return absolute paths to all files with given file ending in self.directory"""
walk_results = os.walk(self.directory)
@Slater-Victoroff
Slater-Victoroff / Synonym_checker
Created June 5, 2013 13:23
General case synonym matching using nltk.
from nltk.corpus import wordnet
from nltk.stem.wordnet import WordNetLemmatizer
import itertools
def Synonym_Checker(word1, word2):
"""Checks if word1 and word2 and synonyms. Returns True if they are, otherwise False"""
equivalence = WordNetLemmatizer()
word1 = equivalence.lemmatize(word1)
@Slater-Victoroff
Slater-Victoroff / gist:5235788
Created March 25, 2013 08:54
Cleaning Strings to tokens with stemming and url removal
public Set<String> parseRawString(String rawString, SnowballStemmer stemmer){
Set<String> answer = new HashSet<String>();
String[] firstSplit = rawString.split("[\\t\\n\\r]");
List<String> rawSplit = new ArrayList<String>();
for (String s: firstSplit) try{
URL url = new URL(s);
} catch (MalformedURLException e){
rawSplit.addAll(Arrays.asList(s.split("[\\p{P}]")));
}
for (String s: rawSplit){