This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apiVersion: extensions/v1beta1 | |
kind: Ingress | |
metadata: | |
name: docker-registry-ingress | |
namespace: default | |
annotations: | |
kubernetes.io/ingress.class: "nginx" | |
cert-manager.io/issuer: "letsencrypt-prod" | |
spec: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"net/http" | |
) | |
func hello(w http.ResponseWriter, req *http.Request) { | |
fmt.Fprintf(w, "hello\n") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
export DUMP_FILE=/backup_`date +%Y%m%d_%H%M%S`.pgdump | |
PGPASSWORD=$POSTGRES_PASSWORD pg_dump -d $POSTGRES_DB -U $POSTGRES_USER -h $POSTGRES_HOST -f $DUMP_FILE | |
bzip2 $DUMP_FILE | |
mcrypt ${DUMP_FILE}.bz2 -k $DB_BACKUP_PASSWORD | |
aws s3 cp ${DUMP_FILE}.bz2.nc $S3_BACKUP_PATH |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apiVersion: batch/v1beta1 | |
kind: CronJob | |
metadata: | |
name: ds-pre-db-backup | |
namespace: ds | |
spec: | |
schedule: "0 2 * * *" | |
jobTemplate: | |
spec: | |
template: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM mdillon/postgis:9.6 | |
RUN apt update -y | |
RUN apt install -y awscli mcrypt | |
COPY do_backup.sh / | |
RUN chmod +x /do_backup.sh |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
creds = {'acct': 'YOUR_ACCOUNT', 'pw': 'YOUR_PASSWORD'} | |
sess = requests.Session() | |
resp = sess.post('https://news.ycombinator.com/login', creds) | |
assert resp.status_code == 200 | |
cnt = 0 | |
items = [] | |
nextUrl = f"https://news.ycombinator.com/upvoted?id={creds['acct']}" | |
while True: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ScraperWebJS: | |
prepared = False | |
def prepare(self): | |
""" | |
Prepare the headless browser for a scrap session | |
""" | |
if self.prepared: | |
return |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
scrapper = ScraperWebJS() | |
scraped_urls = [] | |
for url in tqdm(urls): | |
try: | |
content = scrapper.scrape(url) | |
except Exception as err: | |
print(f"Error can't scrape {url} => {err}") | |
continue | |
if content is None: | |
continue |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ScrapedWeb(object): | |
""" | |
Scraped web (POJO) | |
""" | |
def __init__(self, url: str, title: str, description: str, headings: List[str], contents: List[str], dom: BeautifulSoup): | |
self.url = url | |
self.title = title | |
self.description = description | |
self.headings = headings |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def html2ScrapedWeb(url: str, html: str) -> ScrapedWebVitamined: | |
""" | |
Parse HTML using BS4 HTML5Lib parser and get <body> content without | |
<nav>, <script>, <footer> | |
It's focus on content | |
""" | |
dom = BeautifulSoup(html, 'html5lib') | |
# 1. Get title | |
title = dom.title.string if dom.title else None | |
# 2. Get description |