Skip to content

Instantly share code, notes, and snippets.

View FeryET's full-sized avatar

Farhood FeryET

View GitHub Profile
docs_train, docs_test, y_train, y_test = train_test_split(docs, y, test_size=0.1, shuffle=True)
labels = ["rec.autos", "rec.motorcycles", "rec.sport.baseball", "rec.sport.hockey"]
raw_docs, y = fetch_20newsgroups(subset='all', return_X_y=True, categories=labels)
import logging
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import (RepeatedStratifiedKFold, cross_val_score, )
from sklearn.pipeline import Pipeline
@FeryET
FeryET / clean_text.py
Created August 26, 2020 10:06
Gists for Medium Article: Text Classification using LDA
processor = SpacyCleaner(chunksize=1000, workers=workers)
docs = processor.transform(raw_docs)
def foo(func, *args, **kwargs):
res = func(*args, **kwargs)
return res["a key"]["in json"]["response"]
class A:
def __init__(self, api):
super().__init__()
self.api = api
def get_x(self):