significative relationships = word weight > 500
if word "que" has "significative relationships" with word la" then is negative
-
NO QUIERO QUE SEA LA ULTIMA PELICULA DE JOHNNY DEEP EN PIRATAS DEL CARIBE
-
yo creo y lo digo mui en lo personal que la 4 fue la mas aburrida pues faltaron personajes que hacen de estas peliculas mas interesantes para mi estubo mejor la 3 y eso que soy bien fan de piratas del caribe
Test Model Negative Positive:
{
word: "que"
relationships: [
{word: "la", weight: 1},
{word: "hacen", weight: 1},
{word: "soy", weight: 1},
]
}Test Model Positive:
{
word: "que"
relationships: [
{word: "la", weight: 3221},
{word: "hacen", weight: 23},
{word: "soy", weight: 2655},
]
}def add_word(big_data_relation, word, previous_word, next_word):
is_new_word = True
relationship_words = []
if previous_word:
relationship_words.append(previous_word)
if next_word:
relationship_words.append(next_word)
# Loop all the list of words and if it find the word in the list then add its relationships to it
for i in range(0, len(big_data_relation)):
if big_data_relation[i]['parent_word'] == word:
is_new_word = False
is_new_relationship = True
for relationship_word in relationship_words:
for j in range(0, len(big_data_relation[i]['relationships'])):
if big_data_relation[i]['relationships'][j]['word'] == relationship_word:
is_new_relationship = False
big_data_relation[i]['relationships'][j]['weight'] += 1
if is_new_relationship:
relationship_model = {'word': "", 'weight': 0}
relationship_model['word'] = relationship_word
relationship_model['weight'] = 1
big_data_relation[i]['relationships'].append(relationship_model)
if is_new_word:
word_model = {'parent_word': "", 'relationships': []}
word_model['parent_word'] = word
if previous_word:
relationship_model = {'word': "", 'weight': 0}
relationship_model['word'] = previous_word
relationship_model['weight'] = 1
word_model['relationships'].append(relationship_model)
if next_word:
relationship_model = {'word': "", 'weight': 0}
relationship_model['word'] = next_word
relationship_model['weight'] = 1
word_model['relationships'].append(relationship_model)
big_data_relation.append(word_model)
def format_text(text):
return text.lower().replace('.', '').replace(',', '').replace('?', '').replace('!', '')
# This should be trained with a positive or negative list of text when the big_data_relations is created then
# you should have the relationships and the words for that polarity, for example, if you have input a lot of negative
# texts your big_data_relation would have a list of the words and relationships for negative words.
big_data_relation = []
while True:
text = raw_input('Write a text: ')
list_of_words = format_text(text).split(' ')
n = len(list_of_words)
for i in range(0, n):
current_word = list_of_words[i]
previous_word = list_of_words[i-1] if i > 0 else None
next_word = list_of_words[i+1] if i < n - 1 else None
add_word(big_data_relation, list_of_words[i], previous_word, next_word)
print "----------\n%s\n---------" % big_data_relation