Created
July 31, 2019 05:00
-
-
Save jaradc/64c2f58ce3370321eee60c6385be56df to your computer and use it in GitHub Desktop.
Basic Keyword Clustering Example in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from nltk.stem import PorterStemmer, WordNetLemmatizer | |
from nltk.corpus import stopwords | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn import cluster | |
stemmer = PorterStemmer() | |
sw = stopwords.words('english') | |
def tokenizer(keyword): | |
return [stemmer.stem(w) for w in keyword.split()] | |
keywords = [ | |
'campaign building', | |
'ppc campaign generator', | |
'how to build ppc campaigns', | |
'how do you group keywords', | |
'how to group keywords', | |
'keyword grouper', | |
'keyword grouping software', | |
'ppc campaign builder', | |
'best software to group keywords' | |
] | |
tfidf = TfidfVectorizer(tokenizer=lemmatize, stop_words=sw) | |
X = pd.DataFrame(tfidf.fit_transform(keywords).toarray(), | |
index=keywords, columns=tfidf.get_feature_names()) | |
c = cluster.AffinityPropagation() | |
pred = c.fit_predict(X) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi Sir. I just want to ask if this is also available for big sets of keywords. About 12k+ keywords?