JM Robles jmrobles

jmrobles / ingress-docker-registry.yaml

Created May 23, 2020 10:38

	apiVersion: extensions/v1beta1
	kind: Ingress
	metadata:
	name: docker-registry-ingress
	namespace: default
	annotations:
	kubernetes.io/ingress.class: "nginx"
	cert-manager.io/issuer: "letsencrypt-prod"

	spec:

jmrobles / simple-go-http-server.go

Last active June 14, 2020 11:16

	package main

	import (
	"fmt"
	"net/http"
	)

	func hello(w http.ResponseWriter, req *http.Request) {

	fmt.Fprintf(w, "hello\n")

jmrobles / do-backup.sh

Created July 6, 2020 17:08

Backup Postgres DB Script

	#!/bin/bash
	export DUMP_FILE=/backup_`date +%Y%m%d_%H%M%S`.pgdump
	PGPASSWORD=$POSTGRES_PASSWORD pg_dump -d $POSTGRES_DB -U $POSTGRES_USER -h $POSTGRES_HOST -f $DUMP_FILE
	bzip2 $DUMP_FILE
	mcrypt ${DUMP_FILE}.bz2 -k $DB_BACKUP_PASSWORD
	aws s3 cp ${DUMP_FILE}.bz2.nc $S3_BACKUP_PATH

jmrobles / backup-postgres-cronjob.yaml

Created July 6, 2020 17:38

jmrobles / Dockerfile

Created July 6, 2020 17:55

Dockerfile for Postgres Backup Container

	FROM mdillon/postgis:9.6
	RUN apt update -y
	RUN apt install -y awscli mcrypt
	COPY do_backup.sh /
	RUN chmod +x /do_backup.sh

jmrobles / hn-upvoted-scrap.py

Created August 13, 2020 14:46

Hacker News upvoted scrap

	import requests
	from bs4 import BeautifulSoup
	creds = {'acct': 'YOUR_ACCOUNT', 'pw': 'YOUR_PASSWORD'}
	sess = requests.Session()
	resp = sess.post('https://news.ycombinator.com/login', creds)
	assert resp.status_code == 200
	cnt = 0
	items = []
	nextUrl = f"https://news.ycombinator.com/upvoted?id={creds['acct']}"
	while True:

jmrobles / selenium-chrome-scrapper.py

Created August 13, 2020 15:30

Selenium with Chrome webdriver scraper class

	class ScraperWebJS:

	prepared = False

	def prepare(self):
	"""
	Prepare the headless browser for a scrap session
	"""
	if self.prepared:
	return

jmrobles / selenium-scrap-loop.py

Created August 13, 2020 15:33

Main loop to scrap the links with Selenium

	scrapper = ScraperWebJS()
	scraped_urls = []
	for url in tqdm(urls):
	try:
	content = scrapper.scrape(url)
	except Exception as err:
	print(f"Error can't scrape {url} => {err}")
	continue
	if content is None:
	continue

jmrobles / scrapeweb-class.py

Created August 13, 2020 15:49

	class ScrapedWeb(object):
	"""
	Scraped web (POJO)
	"""
	def __init__(self, url: str, title: str, description: str, headings: List[str], contents: List[str], dom: BeautifulSoup):

	self.url = url
	self.title = title
	self.description = description
	self.headings = headings

jmrobles / html2ScrapWeb.py

Created August 13, 2020 15:51

	def html2ScrapedWeb(url: str, html: str) -> ScrapedWebVitamined:
	"""
	Parse HTML using BS4 HTML5Lib parser and get <body> content without
	<nav>, <script>, <footer>
	It's focus on content
	"""
	dom = BeautifulSoup(html, 'html5lib')
	# 1. Get title
	title = dom.title.string if dom.title else None
	# 2. Get description