Skip to content

Instantly share code, notes, and snippets.

def lemmatizer(doc):
"""
This takes in a doc of tokens from the NER and lemmatizes them.
Pronouns (like "I" and "you" get lemmatized to '-PRON-', so I'm removing those.
"""
doc = [token.lemma_ for token in doc if token.lemma_ != '-PRON-']
doc = u' '.join(doc)
return nlp.make_doc(doc)
def remove_stopwords(doc):
CUSTOM_STOP_WORDS = {'commit', 'github', 'pdf', 'download', 'desktop', '$', '|', '\\', '/', '#'}
nlp.Defaults.stop_words |= CUSTOM_STOP_WORDS
nlp.add_pipe(lemmatizer,name='lemmatizer',after='ner')
nlp.add_pipe(remove_stopwords, name="stopwords", last=True)
docs = []
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
u"\U00002500-\U00002BEF" # chinese char
u"\U00002702-\U000027B0"
u"\U00002702-\U000027B0"
u"\U000024C2-\U0001F251"
import gensim.corpora as corpora
words = corpora.Dictionary(docs)
corpus = [words.doc2bow(doc) for doc in docs]
perplexities = []
coherence = []
num_topics = [3,4, 10, 20] + list(range(5, 75, 10))
for nt in tqdm(num_topics):
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,
id2word=words,
num_topics=nt,
random_state=2,
update_every=1,
passes=10,
apiVersion: v1
kind: Service
metadata:
annotations:
load-balancer.hetzner.cloud/health-check-port: "<YOUR-INGRESS-HEALTH-PORT: i.e 31902>"
load-balancer.hetzner.cloud/name: "<YOUR-LB-NAME>"
spec:
clusterIP: <Internal-IP>
externalTrafficPolicy: Local
healthCheckNodePort: 30787
...
rancher_kubernetes_engine_config:
...
addons: |-
---
apiVersion: v1
stringData:
token: <YOUR-HETZNER-API-TOKEN>
kind: Secret
metadata:
@jmrobles
jmrobles / proxy.conf.js
Last active October 25, 2020 17:48
proxy.conf base
const PROXY_CONFIG = {
"/api": {
"target": "http://my-backend.com",
"changeOrigin": true,
"secure": false,
"logLevel": "debug"
}
};
module.exports = PROXY_CONFIG;
const PROXY_CONFIG = {
"/api": {
"target": "http://my-backend.com",
"changeOrigin": true,
"secure": false,
"logLevel": "debug",
"pathRewrite": {
"^/api": ""
}
}