Created
May 8, 2016 16:27
-
-
Save rohitr360/9a386689e3819d9f071faac59d9b98fa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import re, os, time, csv, random, pymongo, math, html, string, html2text | |
from pymongo import MongoClient | |
import xml.etree.ElementTree as ET | |
import pymongo | |
import sklearn | |
#Currently we have a pandas DataFrame with a row for each article classification | |
#The fields assumed when writing this code are 'grade', 'text', 'title', 'date', etc. | |
def get_lm_words(): | |
global lm_words | |
lm_words = pd.read_csv('../../pluribus_labs/earnings_call/data/dictionary_data/LoughranMcDonald_MasterDictionary_2014.csv')['Word'] | |
return lm_words | |
def get_tf_vector(text): | |
text = text.upper() | |
text_split = text.split() | |
tf_dict = {} | |
for word in lm_words: | |
tf_dict[word] = pd.Series(text_split).map(lambda x: word == x).sum() | |
tf_vector = tf_dict.values() | |
tf_keys = tf_dict.keys() | |
return tf_vector | |
def get_grade_tf_df(df): | |
text_col = df.text | |
df['tf_vector'] = text_col.map(lambda x: get_tf_vector(x)) | |
return df[['grade', 'tf_vector']] | |
def df_to_csv(df, path): | |
return df.to_csv(path) | |
def logistic_regression_fit(grade_tf_df): | |
grade = grade_tf_df.grade | |
tf_vector = grade_tf_df.tf_vector | |
LR_model = sklearn.linear_model.LogisticRegression() | |
def logistic_regression_predict(LR_fit, tf_matrix): | |
#The tf matrix is a list of tf_vectors you want a grade for | |
#This functions returns a classification (grade) for each input tf_vector | |
LR_predict = LR_fit.predict(tf_vector) | |
return LR_predict | |
def logistic_regression_summary(LR_fit): | |
return LR_fit.summary() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment