Skip to content

Instantly share code, notes, and snippets.

@prrao87
Created August 30, 2019 20:55
Show Gist options
  • Save prrao87/73a4507be6feba47fbc4ccb7702fc6e7 to your computer and use it in GitHub Desktop.
Save prrao87/73a4507be6feba47fbc4ccb7702fc6e7 to your computer and use it in GitHub Desktop.
class SVMSentiment(Base):
"""Predict fine-grained sentiment scores using a sklearn
linear Support Vector Machine (SVM) pipeline."""
def __init__(self, model_file: str=None) -> None:
super().__init__()
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline
self.pipeline = Pipeline(
[
('vect', CountVectorizer()),
('tfidf', TfidfTransformer()),
('clf', SGDClassifier(
loss='hinge',
penalty='l2',
alpha=1e-3,
random_state=42,
max_iter=100,
learning_rate='optimal',
tol=None,
)),
]
)
def predict(self, train_file: str, test_file: str, lower_case: bool=False) -> pd.DataFrame:
"Train model using sklearn pipeline"
train_df = self.read_data(train_file, lower_case)
learner = self.pipeline.fit(train_df['text'], train_df['truth'])
# Predict class labels using the learner and output DataFrame
test_df = self.read_data(test_file, lower_case)
test_df['pred'] = learner.predict(test_df['text'])
return test_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment