JM Robles jmrobles

jmrobles / addon-rke-cloud-provider-manager.yaml

Last active August 24, 2020 17:11

	...
	rancher_kubernetes_engine_config:
	...
	addons: \|-
	---
	apiVersion: v1
	stringData:
	token: <YOUR-HETZNER-API-TOKEN>
	kind: Secret
	metadata:

jmrobles / ingress-controller-lb-patch.yaml

Last active December 17, 2021 10:42

	apiVersion: v1
	kind: Service
	metadata:
	annotations:
	load-balancer.hetzner.cloud/health-check-port: "<YOUR-INGRESS-HEALTH-PORT: i.e 31902>"
	load-balancer.hetzner.cloud/name: "<YOUR-LB-NAME>"
	spec:
	clusterIP: <Internal-IP>
	externalTrafficPolicy: Local
	healthCheckNodePort: 30787

jmrobles / lda-loop-n-topics.py

Created August 17, 2020 17:58

	perplexities = []
	coherence = []
	num_topics = [3,4, 10, 20] + list(range(5, 75, 10))
	for nt in tqdm(num_topics):
	lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,
	id2word=words,
	num_topics=nt,
	random_state=2,
	update_every=1,
	passes=10,

jmrobles / bow-gen.py

Created August 17, 2020 17:51

	import gensim.corpora as corpora
	words = corpora.Dictionary(docs)
	corpus = [words.doc2bow(doc) for doc in docs]

jmrobles / nlp-start-pipeline-spacy.py

Created August 15, 2020 09:27

	docs = []
	emoji_pattern = re.compile("["
	u"\U0001F600-\U0001F64F" # emoticons
	u"\U0001F300-\U0001F5FF" # symbols & pictographs
	u"\U0001F680-\U0001F6FF" # transport & map symbols
	u"\U0001F1E0-\U0001F1FF" # flags (iOS)
	u"\U00002500-\U00002BEF" # chinese char
	u"\U00002702-\U000027B0"
	u"\U00002702-\U000027B0"
	u"\U000024C2-\U0001F251"

jmrobles / add-pipes-spacy.py

Created August 15, 2020 09:24

	nlp.add_pipe(lemmatizer,name='lemmatizer',after='ner')
	nlp.add_pipe(remove_stopwords, name="stopwords", last=True)

jmrobles / add-custom-stop-words.py

Created August 15, 2020 09:22

	CUSTOM_STOP_WORDS = {'commit', 'github', 'pdf', 'download', 'desktop', '$', '\|', '\\', '/', '#'}
	nlp.Defaults.stop_words \|= CUSTOM_STOP_WORDS

jmrobles / custom-pipes-spacy.py

Created August 15, 2020 09:19

	def lemmatizer(doc):
	"""
	This takes in a doc of tokens from the NER and lemmatizes them.
	Pronouns (like "I" and "you" get lemmatized to '-PRON-', so I'm removing those.
	"""
	doc = [token.lemma_ for token in doc if token.lemma_ != '-PRON-']
	doc = u' '.join(doc)
	return nlp.make_doc(doc)

	def remove_stopwords(doc):

jmrobles / html2ScrapWeb.py

Created August 13, 2020 15:51

	def html2ScrapedWeb(url: str, html: str) -> ScrapedWebVitamined:
	"""
	Parse HTML using BS4 HTML5Lib parser and get <body> content without
	<nav>, <script>, <footer>
	It's focus on content
	"""
	dom = BeautifulSoup(html, 'html5lib')
	# 1. Get title
	title = dom.title.string if dom.title else None
	# 2. Get description

jmrobles / scrapeweb-class.py

Created August 13, 2020 15:49

	class ScrapedWeb(object):
	"""
	Scraped web (POJO)
	"""
	def __init__(self, url: str, title: str, description: str, headings: List[str], contents: List[str], dom: BeautifulSoup):

	self.url = url
	self.title = title
	self.description = description
	self.headings = headings