Last active
November 22, 2021 15:28
-
-
Save andrea-dagostino/f40eea6a731a1ad1376fc70c5dcf3aab to your computer and use it in GitHub Desktop.
posts/raggruppamento-testuale-con-tf-idf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# importiamo le librerie necessarie da sklearn | |
from sklearn.datasets import fetch_20newsgroups | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.cluster import KMeans | |
from sklearn.decomposition import PCA | |
# importiamo le altre librerie necessarie | |
import pandas as pd | |
import numpy as np | |
# librerie per la manipolazione del testo | |
import re | |
import string | |
import nltk | |
from nltk.corpus import stopwords | |
# importiamo le librerie di visualizzazione | |
import matplotlib.pyplot as plt | |
categories = [ | |
'comp.graphics', | |
'comp.os.ms-windows.misc', | |
'rec.sport.baseball', | |
'rec.sport.hockey', | |
'alt.atheism', | |
'soc.religion.christian', | |
] | |
dataset = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, remove=('headers', 'footers', 'quotes')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment