language-engineering’s gists

language-engineering / gist:7388007

Created November 9, 2013 17:54

	from nltk.tokenize import word_tokenize
	from nltk import pos_tag
	from sussex_nltk.parse import dep_parse_sentences_arceager

	sentences = ["This is the first example sentence",
	"This is the second example sentence",
	"This is the third example sentence"]

	parsed_sents = dep_parse_sentences_arceager(pos_tag(word_tokenize(sentence)) for sentence in sentences)

language-engineering / gist:7385679

Last active December 27, 2015 20:39

	from sussex_nltk.parse import load_parsed_example_sentences

	parsed_example_sentences = load_parsed_example_sentences()

	# To inspect the sentences, you could print them straight out
	for parsed_sentence in parsed_example_sentences:
	print "--- Sentence ---"
	print parsed_sentence

language-engineering / gist:7356872

Last active December 27, 2015 16:39

	Input:
	query_token = The token we're interested in. It is part of a dependency tree of tokens.

	Output:
	opinions = An array of token forms (which constitute the extracted opinions related to the query token

	opinions = []
	for each dependent token of query_token, then
	append dependent's form to opinions array if the dependency relation of the dependent is "det"

language-engineering / gist:7354649

Last active December 27, 2015 16:19

	def opinion_extractor(aspect_token, parsed_sentence):

	# Your function will have 3 steps:

	# i. Initialise a list of opinions
	opinions = []

	# ii. Find opinions (as an example we get all the dependants of the aspect token that have the relation "det")
	opinions += [dependant.form for dependant in parsed_sentence.get_dependants(aspect_token) if dependant.deprel == "det"]
	# You can continue to add to "opinions". Remember you can get the head of a token, and filter by PoS tag or Deprel too!

language-engineering / gist:7354040

Last active December 27, 2015 16:19

	from sussex_nltk.parse import load_parsed_dvd_sentences, load_parsed_example_sentences

	aspect = "plot" # Set this to the aspect token you're interested in
	save_file_path = r"/path/to/savefile.txt" # Set this to the location of the file you wish to create/overwrite with the saved output.

	# Tracking these numbers will allow us to see what proportion of sentences we discovered features in
	sentences_with_discovered_features = 0 # Number of sentences we discovered features in
	total_sentences = 0 # Total number of sentences

	# This is a "with statement", it invokes a context manager, which handles the opening and closing of resources (like files)

language-engineering / gist:7321434

Last active December 27, 2015 11:49

	# Say for example we acquire a list of BasicToken objects by getting all the dependants of a token:
	dependants = parsed_sentence.get_dependants(aspect_token)

	# We could filter that list, keeping only those tokens whose dependency relations with the aspect token are "dobj", by doing the following:
	dependants = [token for token in dependants if token.deprel == "dobj"]

	# Or we could filter that list, keeping only those tokens whose PoS tags are "RB" (for adverb)
	dependants = [token for token in dependants if token.pos == "RB"]

	# Or we could filter that list, keeping only those tokens whose form is NOT "main" or "special"

language-engineering / gist:7321248

Last active December 27, 2015 11:49

	# Given a ParsedSentence object, and an aspect token acquired from it (as in the previous section)
	# Get the head of the aspect token

	head_token = parsed_sentence.get_head(aspect_token)
	print head_token

language-engineering / gist:7321059

Last active December 27, 2015 11:49

	# Given a ParsedSentence object, and an aspect token acquired from it (as in the previous section)
	# Get all of the dependants of that aspect token.
	dependants = parsed_sentence.get_dependants(aspect_token)

	# You could print them out for inspection
	for dependant in dependants:
	print dependant

language-engineering / gist:7320665

Last active December 27, 2015 11:39

	aspect = "dialogue"

	# If you have a ParsedSentence object, you can get all the tokens whose form matches the aspect as shown below.
	# So instead of just printing the parsed_sentence as in the previous section, get its aspect tokens and print them.
	aspect_tokens = parsed_sentence.get_query_tokens(aspect)

	# You could iterate over them and print them for inspection
	for aspect_token in aspect_tokens:
	print aspect_token

language-engineering / gist:7320227

Last active November 18, 2016 18:11

This show you how to search the DVD amazon reviews for a particular query token.

	from sussex_nltk.parse import load_parsed_dvd_sentences

	aspect = "dialogue" # Our aspect word
	parsed_sentences = load_parsed_dvd_sentences(aspect)

	# To inspect the sentences, you could print them straight out
	for parsed_sentence in parsed_sentences:
	print "--- Sentence ---"
	print parsed_sentence