Skip to content

Instantly share code, notes, and snippets.

View taufiqibrahim's full-sized avatar

taufiqibrahim

View GitHub Profile
{
"version": 8,
"name": "Dark Matter without labels",
"metadata": {"maputnik:renderer": "mbgljs"},
"sources": {
"carto": {
"type": "vector",
"url": "https://tiles.basemaps.cartocdn.com/vector/carto.streets/v1/tiles.json"
}
},
@taufiqibrahim
taufiqibrahim / mysql.sql
Last active September 12, 2022 06:02
MySQL and Postgres Assessment Scripts
SELECT t.table_catalog, t.table_schema, t.table_name,
CASE WHEN primary_key_columns IS NOT NULL THEN 'true' ELSE 'false' END AS has_primary_key, primary_key_columns,
CASE WHEN unique_key_columns IS NOT NULL THEN 'true' ELSE 'false' END AS has_unique_key, unique_key_columns,
t.TABLE_ROWS as estimate_rows,
ROUND(((data_length + index_length) / 1024 / 1024), 2) as table_size_mb
FROM information_schema.TABLES t
LEFT JOIN (
SELECT TABLE_CATALOG,TABLE_SCHEMA, TABLE_NAME, GROUP_CONCAT(COLUMN_NAME SEPARATOR ',') AS primary_key_columns
FROM information_schema.`COLUMNS` c WHERE COLUMN_KEY IN ('PRI')
GROUP BY TABLE_CATALOG,TABLE_SCHEMA, TABLE_NAME, COLUMN_KEY
# Create Pub/Sub schema
gcloud pubsub schemas create mysql.inventory.addresses-schema \
--type=AVRO \
--definition='{"type":"record","name":"MysqlInventoryAddressesSchema","fields":[{"type":"int","optional":false,"name":"id"},{"type":"int","optional":false,"name":"customer_id"},{"type":"string","optional":false,"name":"street"},{"type":"string","optional":false,"name":"city"},{"type":"string","optional":false,"name":"state"},{"type":"string","optional":false,"name":"zip"},{"type":"string","optional":false,"name":"type"},{"type":"string","optional":true,"name":"__op"},{"type":"string","optional":true,"name":"__table"},{"type":"long","optional":true,"name":"__source_ts_ms"},{"type":"string","optional":true,"name":"__deleted"}]}'
# Describe schema
gcloud pubsub schemas describe mysql.inventory.addresses-schema
# Create Pub/Sub topic with schema
gcloud pubsub topics create mysql.inventory.addresses --message-encoding=json --schema=mysql.inventory.addresses-schema
# Delete subscription
gcloud pubsub subscriptions delete mysql.inventory.products-bq-sub
# Delete topic
gcloud pubsub topics delete mysql.inventory.products
# Recreate BigQuery table (Using SQL UI)
CREATE OR REPLACE TABLE
<project_id>.<dataset>.mysql_inventory_products_no_schema (
data STRING
export PROJECT_ID=<your_gcp_project_id_here>
export BQ_DATASET=<your_bigquery_dataset_name_here>
gcloud pubsub schemas create mysql.inventory.products-schema \
--type=AVRO \
--definition='
{
"type" : "record",
"name" : "MysqlInventoryProductsSchema",
"fields" : [
{
"type": "int",
"optional": false,
debezium.sink.type=pubsub
debezium.sink.pubsub.project.id=<GOOGLE_CLOUD_PROJECT_ID>
debezium.source.connector.class=io.debezium.connector.mysql.MySqlConnector
debezium.source.database.hostname=mysql
debezium.source.database.port=3306
debezium.source.database.user=root
debezium.source.database.password=debezium
debezium.source.database.server.id=1001
debezium.source.database.server.name=mysql
debezium.source.database.include.list=inventory
version: '3.9'
services:
mysql:
image: quay.io/debezium/example-mysql
container_name: mysql
ports:
- 3306:3306
environment:
- MYSQL_ROOT_PASSWORD=debezium
- MYSQL_USER=mysqluser
@taufiqibrahim
taufiqibrahim / read_pdf_files_transpose_compile_to_csv.py
Created May 3, 2022 18:21
Read all *.pdf files inside PATH_DIR, transpose and compile into single CSV file
"""
Requirements:
pip install tabula-py
pip install tabulate
Read all *.pdf files inside PATH_DIR, transpose and compile into single CSV file
"""
import pandas as pd
import os
# Install gcsfuse
export GCSFUSE_REPO=gcsfuse-`lsb_release -c -s`
echo "deb http://packages.cloud.google.com/apt $GCSFUSE_REPO main" | sudo tee /etc/apt/sources.list.d/gcsfuse.list
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
sudo apt-get update
sudo apt-get install gcsfuse
# Add this to /etc/fstab
# Create a file /ftpdata/key.json which contains service account JSON key