rohitr360 · May 8, 2016 16:27
diff --git a/logistic_regression_model (1).py b/logistic_regression_model (1).py
 import pandas as pd
 import numpy as np
 import re, os, time, csv, random, pymongo, math, html, string, html2text
 from pymongo import MongoClient
 import xml.etree.ElementTree as ET
 import pymongo
 import sklearn



 #Currently we have a pandas DataFrame with a row for each article classification
 #The fields assumed when writing this code are 'grade', 'text', 'title', 'date', etc.

 def get_lm_words():
    global lm_words
    lm_words = pd.read_csv('../../pluribus_labs/earnings_call/data/dictionary_data/LoughranMcDonald_MasterDictionary_2014.csv')['Word']
    return lm_words    

 def get_tf_vector(text):
    text = text.upper()
    text_split = text.split()
    tf_dict = {}
    for word in lm_words:
        tf_dict[word] = pd.Series(text_split).map(lambda x: word == x).sum()
    tf_vector = tf_dict.values()
    tf_keys = tf_dict.keys()
    return tf_vector

 def get_grade_tf_df(df):
    text_col = df.text
    df['tf_vector'] = text_col.map(lambda x: get_tf_vector(x))
    return df[['grade', 'tf_vector']]


 def df_to_csv(df, path):
    return df.to_csv(path)    


 def logistic_regression_fit(grade_tf_df):
    grade = grade_tf_df.grade
    tf_vector = grade_tf_df.tf_vector
    LR_model = sklearn.linear_model.LogisticRegression()
    
    
 def logistic_regression_predict(LR_fit, tf_matrix):
    #The tf matrix is a list of tf_vectors you want a grade for
    #This functions returns a classification (grade) for each input tf_vector
    LR_predict = LR_fit.predict(tf_vector)
    return LR_predict

 def logistic_regression_summary(LR_fit):
    return LR_fit.summary()
	import pandas as pd
	import numpy as np
	import re, os, time, csv, random, pymongo, math, html, string, html2text
	from pymongo import MongoClient
	import xml.etree.ElementTree as ET
	import pymongo
	import sklearn



	#Currently we have a pandas DataFrame with a row for each article classification
	#The fields assumed when writing this code are 'grade', 'text', 'title', 'date', etc.

	def get_lm_words():
	global lm_words
	lm_words = pd.read_csv('../../pluribus_labs/earnings_call/data/dictionary_data/LoughranMcDonald_MasterDictionary_2014.csv')['Word']
	return lm_words

	def get_tf_vector(text):
	text = text.upper()
	text_split = text.split()
	tf_dict = {}
	for word in lm_words:
	tf_dict[word] = pd.Series(text_split).map(lambda x: word == x).sum()
	tf_vector = tf_dict.values()
	tf_keys = tf_dict.keys()
	return tf_vector

	def get_grade_tf_df(df):
	text_col = df.text
	df['tf_vector'] = text_col.map(lambda x: get_tf_vector(x))
	return df[['grade', 'tf_vector']]


	def df_to_csv(df, path):
	return df.to_csv(path)


	def logistic_regression_fit(grade_tf_df):
	grade = grade_tf_df.grade
	tf_vector = grade_tf_df.tf_vector
	LR_model = sklearn.linear_model.LogisticRegression()


	def logistic_regression_predict(LR_fit, tf_matrix):
	#The tf matrix is a list of tf_vectors you want a grade for
	#This functions returns a classification (grade) for each input tf_vector
	LR_predict = LR_fit.predict(tf_vector)
	return LR_predict

	def logistic_regression_summary(LR_fit):
	return LR_fit.summary()