This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public Set<String> parseRawString(String rawString, SnowballStemmer stemmer){ | |
Set<String> answer = new HashSet<String>(); | |
String[] firstSplit = rawString.split("[\\t\\n\\r]"); | |
List<String> rawSplit = new ArrayList<String>(); | |
for (String s: firstSplit) try{ | |
URL url = new URL(s); | |
} catch (MalformedURLException e){ | |
rawSplit.addAll(Arrays.asList(s.split("[\\p{P}]"))); | |
} | |
for (String s: rawSplit){ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.corpus import wordnet | |
from nltk.stem.wordnet import WordNetLemmatizer | |
import itertools | |
def Synonym_Checker(word1, word2): | |
"""Checks if word1 and word2 and synonyms. Returns True if they are, otherwise False""" | |
equivalence = WordNetLemmatizer() | |
word1 = equivalence.lemmatize(word1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
class PyGrep: | |
def __init__(self, directory): | |
self.directory = directory | |
def grab_all_files_with_ending(self, file_ending): | |
"""Will return absolute paths to all files with given file ending in self.directory""" | |
walk_results = os.walk(self.directory) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ElasticEnchant: | |
def __init__(self, esDatabase): | |
self.es_instance = esDatabase | |
def produce_dictionary(self, output_file, **kwargs): | |
"""Produces a dictionary or updates it depending on kwargs | |
If no kwargs are given then this method will write a full dictionary including all | |
entries in all indices and types and output it in an enchant-friendly way to the output file. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask import Blueprint, request, redirect, render_template, url_for | |
from flask.views import MethodView | |
from flask.ext.mongoengine.wtf import model_form | |
from SpoolEngine.auth import requires_auth | |
from SpoolEngine.models import Post, BlogPost, Video, Image, Quote, Comment | |
admin = Blueprint('admin', __name__, template_folder='templates') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"mappings": { | |
"properties": { | |
"searchable_text": { | |
"type": "multi_field", | |
"fields": { | |
"full_words": { | |
"type": "string", | |
"store": "yes", | |
"index": "analyzed", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def random_document(document_layout, type, **kwargs): | |
document_layout.append("dummy_item") | |
doc = lambda l:{l[i]:doc(l[i+1]) if isinstance(l[i+1],list) else random_item(type,**kwargs) for i in range(len(l)-1)} | |
return doc(document_layout) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
import cPickle as pickle | |
import random | |
import itertools | |
import string | |
def words(entry): | |
return [word.lower().decode('ascii', 'ignore') for word in entry.split()] | |
def letters(entry): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyfuzz.generator import random_regex | |
from guess_language import guessLanguageName | |
from collections import Counter | |
def randomness_histogram(iterations, length): | |
return Counter((guessLanguageName(random_regex(regex="[a-z \n]", length=length)) for i in xrange(length))) | |
print randomness_histogram(1000, 200).most_common(10) # returns the 10 most common languages | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import flask | |
import sys | |
from flask import Flask, request | |
import random | |
from lxml import etree | |
import xmltodict | |
import numpy as np | |
import operator | |
OlderNewer