Skip to content

Instantly share code, notes, and snippets.

apiVersion: extensions/v1beta1
kind: Ingress
metadata:
name: docker-registry-ingress
namespace: default
annotations:
kubernetes.io/ingress.class: "nginx"
cert-manager.io/issuer: "letsencrypt-prod"
spec:
package main
import (
"fmt"
"net/http"
)
func hello(w http.ResponseWriter, req *http.Request) {
fmt.Fprintf(w, "hello\n")
@jmrobles
jmrobles / do-backup.sh
Created July 6, 2020 17:08
Backup Postgres DB Script
#!/bin/bash
export DUMP_FILE=/backup_`date +%Y%m%d_%H%M%S`.pgdump
PGPASSWORD=$POSTGRES_PASSWORD pg_dump -d $POSTGRES_DB -U $POSTGRES_USER -h $POSTGRES_HOST -f $DUMP_FILE
bzip2 $DUMP_FILE
mcrypt ${DUMP_FILE}.bz2 -k $DB_BACKUP_PASSWORD
aws s3 cp ${DUMP_FILE}.bz2.nc $S3_BACKUP_PATH
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: ds-pre-db-backup
namespace: ds
spec:
schedule: "0 2 * * *"
jobTemplate:
spec:
template:
@jmrobles
jmrobles / Dockerfile
Created July 6, 2020 17:55
Dockerfile for Postgres Backup Container
FROM mdillon/postgis:9.6
RUN apt update -y
RUN apt install -y awscli mcrypt
COPY do_backup.sh /
RUN chmod +x /do_backup.sh
@jmrobles
jmrobles / hn-upvoted-scrap.py
Created August 13, 2020 14:46
Hacker News upvoted scrap
import requests
from bs4 import BeautifulSoup
creds = {'acct': 'YOUR_ACCOUNT', 'pw': 'YOUR_PASSWORD'}
sess = requests.Session()
resp = sess.post('https://news.ycombinator.com/login', creds)
assert resp.status_code == 200
cnt = 0
items = []
nextUrl = f"https://news.ycombinator.com/upvoted?id={creds['acct']}"
while True:
@jmrobles
jmrobles / selenium-chrome-scrapper.py
Created August 13, 2020 15:30
Selenium with Chrome webdriver scraper class
class ScraperWebJS:
prepared = False
def prepare(self):
"""
Prepare the headless browser for a scrap session
"""
if self.prepared:
return
@jmrobles
jmrobles / selenium-scrap-loop.py
Created August 13, 2020 15:33
Main loop to scrap the links with Selenium
scrapper = ScraperWebJS()
scraped_urls = []
for url in tqdm(urls):
try:
content = scrapper.scrape(url)
except Exception as err:
print(f"Error can't scrape {url} => {err}")
continue
if content is None:
continue
class ScrapedWeb(object):
"""
Scraped web (POJO)
"""
def __init__(self, url: str, title: str, description: str, headings: List[str], contents: List[str], dom: BeautifulSoup):
self.url = url
self.title = title
self.description = description
self.headings = headings
def html2ScrapedWeb(url: str, html: str) -> ScrapedWebVitamined:
"""
Parse HTML using BS4 HTML5Lib parser and get <body> content without
<nav>, <script>, <footer>
It's focus on content
"""
dom = BeautifulSoup(html, 'html5lib')
# 1. Get title
title = dom.title.string if dom.title else None
# 2. Get description