Last active
May 19, 2025 17:57
-
-
Save mwmahlberg/560d668f3d8e621af6eb4f2a6cb768a7 to your computer and use it in GitHub Desktop.
cassandra 2 mysql dump via scriptellla
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
configs: | |
scriptella_etl.properties: | |
# Scriptella ETL Configuration Properties | |
# https://scriptella.org/reference/index.html#%3Cproperties%3E | |
# Basically, we define variables that are used in the script. | |
file: etl.properties | |
scriptella_etl.xml: | |
# Scriptella ETL Configuration XML | |
# Our actual conversion script. | |
# For each row in the CSV file, we insert a new row into the MySQL database. | |
# In this demo, the CSV file is created by the Cassandra export script. | |
# The CSV file is created in the shared volume, which is mounted to /mnt/data | |
# in both the cassandra and loader containers. | |
# | |
# Note that the double dollar signs are only here to escape the dollar sign | |
# within the docker-compose.yaml file. | |
# | |
# ALSO note that you will definetly want to adjust the | |
# ops property in the property file to your needs. | |
# More info on batching in scriptella: | |
# http://scriptella.org/docs/api/scriptella/jdbc/package-summary.html#batching | |
content: | | |
<!DOCTYPE etl SYSTEM "http://scriptella.org/dtd/etl.dtd"> | |
<etl> | |
<description>Import of arbitrary length CSV file into MySQL</description> | |
<properties> | |
<include href="etl.properties"/> <!--Load from external properties file--> | |
</properties> | |
<connection id="in" driver="csv" url="/mnt/data/export.csv"> | |
statement.fetchSize=$$ops | |
</connection> | |
<connection driver="$$driver" id="out" url="$$url" user="$$user" | |
password="$$password" classpath="$$classpath"> | |
statement.batchSize=$$ops | |
</connection> | |
<query connection-id="in"> | |
<!--Empty | |
query means select all --> | |
<script connection-id="out"> | |
INSERT INTO users (firstname,lastname) VALUES (?fname,?lname); | |
</script> | |
</query> | |
</etl> | |
mariadb_init.sql: | |
# Just a simple SQL script to create the table. | |
# You will need to adjust that to your data model(s). | |
content: | | |
CREATE TABLE IF NOT EXISTS users ( | |
id INT AUTO_INCREMENT PRIMARY KEY, | |
firstname VARCHAR(255) NOT NULL, | |
lastname VARCHAR(255) NOT NULL | |
); | |
services: | |
db: | |
# The db container is the MySQL database that we want to import the data into. | |
# However, it is only temporary: we just want to load the data into it and then dump it to a file. | |
# Note that we do not use a volume for this container. | |
# This allows you to simply down the stack and start over with a clean database. | |
image: mariadb:11-ubi | |
environment: | |
MARIADB_DATABASE: test | |
MARIADB_USER: username | |
MARIADB_PASSWORD: password | |
MARIADB_ROOT_PASSWORD: t0Ps3cr3t | |
configs: | |
- source: mariadb_init.sql | |
target: /docker-entrypoint-initdb.d/01_init.sql | |
healthcheck: | |
# We want to ensure that the db container is healthy before we start the loader container. | |
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"] | |
start_period: 10s | |
interval: 10s | |
timeout: 5s | |
retries: 3 | |
loader: | |
image: mwmahlberg/scriptella:8-jre-1.2-rel0 | |
build: | |
# Scriptella is a weee bit old and pretty much abandoned. | |
# So we need to build our own image with the MySQL connector included. | |
# This is a multi-stage build: we first download the scriptella and mysql connector | |
# and then copy them into the final image. | |
context: . | |
dockerfile_inline: | | |
FROM alpine:latest AS downloader | |
ARG SCRIPTELLA_VERSION=1.2 | |
ARG MYSQL_CONNECTOR_VERSION=9.3.0 | |
WORKDIR /usr/local/src/ | |
RUN apk add curl unzip && \ | |
curl -LO https://github.com/scriptella/scriptella-etl/releases/download/scriptella-parent-$${SCRIPTELLA_VERSION}/scriptella-$${SCRIPTELLA_VERSION}.zip && \ | |
unzip scriptella-$${SCRIPTELLA_VERSION}.zip && \ | |
curl -qsL https://cdn.mysql.com//Downloads/Connector-J/mysql-connector-j-$${MYSQL_CONNECTOR_VERSION}.tar.gz \ | |
| tar -xzv --strip-components 1 -C scriptella-1.2/lib/ mysql-connector-j-$${MYSQL_CONNECTOR_VERSION}/mysql-connector-j-$${MYSQL_CONNECTOR_VERSION}.jar | |
FROM eclipse-temurin:11-jre-alpine-3.21 | |
ARG SCRIPTELLA_VERSION=1.2 | |
ENV PATH="$PATH:/usr/local/lib/scriptella/bin/" | |
COPY --from=downloader /usr/local/src/scriptella-$${SCRIPTELLA_VERSION} /usr/local/lib/scriptella | |
WORKDIR /opt/export | |
ENTRYPOINT [ "$$JAVA_HOME/bin/java","-jar","/usr/local/lib/scriptella/scriptella.jar"] | |
depends_on: | |
# We want to ensure that the db container is healthy. | |
db: | |
condition: service_healthy | |
configs: | |
- source: scriptella_etl.properties | |
target: /opt/export/etl.properties | |
- source: scriptella_etl.xml | |
target: /opt/export/etl.xml | |
volumes: | |
- type: bind | |
source: ./export.csv | |
target: /mnt/data/export.csv | |
# The entrypoint is where the magic happens. | |
# We wait for the cassandra export to become available and then run the scriptella ETL job. | |
# The scriptella job will read the CSV file and insert the data into the MySQL database | |
# line by line. | |
entrypoint: | |
- /bin/ash | |
- -c | |
- | | |
until [ -f /mnt/data/export.csv ] | |
do | |
echo "Waiting for Cassandra to finish the export..." | |
sleep 5 | |
done | |
$$JAVA_HOME/bin/java -jar /usr/local/lib/scriptella/scriptella.jar | |
dumper: | |
# The final step is to dump the MySQL database to a file. | |
image: alpine:latest | |
depends_on: | |
# We want to ensure that the db container is healthy... | |
db: | |
condition: service_healthy | |
# ...and that the loader container has finished before we start the dumper container. | |
loader: | |
condition: service_completed_successfully | |
volumes: | |
# We bind-mount the the local directory ./dump to /mnt/data in the container. | |
# This is where the MySQL dump will be stored. | |
- type: bind | |
source: ./dump | |
target: /mnt/data | |
entrypoint: | |
- /bin/sh | |
- -c | |
- | | |
echo "Installing MySQL client..." | |
apk add --no-cache mysql-client | |
echo "Dumping MySQL data..." | |
mysqldump --skip-ssl-verify-server-cert -h db -u username -ppassword test | bzip2 -v9 > /mnt/data/dump.sql.bz2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
configs: | |
cassandra_keyspace.cql: | |
# setup script for our demo. | |
content: | | |
CREATE KEYSPACE IF NOT EXISTS sample WITH REPLICATION = | |
{ 'class' : 'SimpleStrategy', 'replication_factor' : 1 } | |
AND DURABLE_WRITES = true; | |
cassandra_users.cql: | |
# We insert some demo users into the users table. | |
content: | | |
CREATE TABLE IF NOT EXISTS sample.users ( | |
id UUID PRIMARY KEY, | |
lname text, | |
fname text ); | |
INSERT INTO sample.users (id, fname, lname) VALUES (uuid(), 'John', 'Doe'); | |
INSERT INTO sample.users (id, fname, lname) VALUES (uuid(), 'Jane', 'Doe'); | |
INSERT INTO sample.users (id, fname, lname) VALUES (uuid(), 'Jim', 'Beam'); | |
INSERT INTO sample.users (id, fname, lname) VALUES (uuid(), 'Jack', 'Daniels'); | |
INSERT INTO sample.users (id, fname, lname) VALUES (uuid(), 'Johnny', 'Walker'); | |
cassandra_export.cql: | |
# This is the first interesting part of the demo. | |
# We export the data from the Cassandra database to a CSV file. | |
# See https://docs.datastax.com/en/cql/hcd/reference/cqlsh-commands/copy.html | |
# for more information. | |
# The CSV file is created in the shared volume, which is mounted to /mnt/data | |
# in both the cassandra and loader containers. | |
content: | | |
COPY sample.users (id, fname, lname) TO '/mnt/data/export.csv' WITH HEADER = true; | |
scriptella_etl.properties: | |
# Scriptella ETL Configuration Properties | |
# https://scriptella.org/reference/index.html#%3Cproperties%3E | |
# Basically, we define variables that are used in the script. | |
content: | | |
#Scriptella ETL Configuration Properties | |
driver=mysql | |
url=jdbc:mysql://db:3306/test | |
user=username | |
password=password | |
classpath=/usr/local/lib/scriptella/lib/mysql-connector-j-9.3.0.jar | |
ops=3 | |
scriptella_etl.xml: | |
# Scriptella ETL Configuration XML | |
# Our actual conversion script. | |
# For each row in the CSV file, we insert a new row into the MySQL database. | |
# In this demo, the CSV file is created by the Cassandra export script. | |
# The CSV file is created in the shared volume, which is mounted to /mnt/data | |
# in both the cassandra and loader containers. | |
# | |
# Note that the double dollar signs are only here to escape the dollar sign | |
# within the docker-compose.yaml file. | |
# | |
# ALSO note that you will definetly want to adjust the | |
# ops property in the property file to your needs. | |
# More info on batching in scriptella: | |
# http://scriptella.org/docs/api/scriptella/jdbc/package-summary.html#batching | |
content: | | |
<!DOCTYPE etl SYSTEM "http://scriptella.org/dtd/etl.dtd"> | |
<etl> | |
<description>Import of arbitrary length CSV file into MySQL</description> | |
<properties> | |
<include href="etl.properties"/> <!--Load from external properties file--> | |
</properties> | |
<connection id="in" driver="csv" url="/mnt/data/export.csv"> | |
statement.fetchSize=$$ops | |
</connection> | |
<connection driver="$$driver" id="out" url="$$url" user="$$user" | |
password="$$password" classpath="$$classpath"> | |
statement.batchSize=$$ops | |
</connection> | |
<query connection-id="in"> | |
<!--Empty | |
query means select all --> | |
<script connection-id="out"> | |
INSERT INTO users (firstname,lastname) VALUES (?fname,?lname); | |
</script> | |
</query> | |
</etl> | |
mariadb_init.sql: | |
# Just a simple SQL script to create the table. | |
# You will need to adjust that to your data model(s). | |
content: | | |
CREATE TABLE IF NOT EXISTS users ( | |
id INT AUTO_INCREMENT PRIMARY KEY, | |
firstname VARCHAR(255) NOT NULL, | |
lastname VARCHAR(255) NOT NULL | |
); | |
volumes: | |
shared: | |
# This is a shared volume that is used by the cassandra and loader containers. | |
# It is used to store the CSV file that is created by the cassandra export script. | |
# The loader container uses this file to import the data into the MySQL database. | |
# This is only needed for this demo. | |
# For actual usage, you would create the csv file on your cassandra cluster, | |
# copy it to your local machine, and then bind-mount it to the loader container. | |
# driver: local | |
# driver_opts: | |
# o: "uid=1001" | |
services: | |
volume-permissions: | |
# This is a simple container that is used to set the permissions on the shared volume. | |
# It is needed because the cassandra container runs as root and the loader container | |
# runs as a non-root user. | |
image: alpine:latest | |
volumes: | |
- type: volume | |
source: shared | |
target: /mnt/data | |
entrypoint: | |
- /bin/sh | |
- -c | |
- | | |
echo "Setting permissions on shared volume..." | |
chown -R 1001:1001 /mnt/data | |
cassandra: | |
# The cassandra container is only used in this demo. | |
# For actual usage, you can remove this container and instead | |
# bind-mount the CSV file you createed on your cassandra cluster | |
# into the loader container. | |
image: bitnami/cassandra:5.0 | |
depends_on: | |
# We want to ensure that the volume-permissions container has finished | |
# before we start the cassandra container. | |
volume-permissions: | |
condition: service_completed_successfully | |
configs: | |
# Setup the keyspace and users table and force the export. | |
- source: cassandra_keyspace.cql | |
target: /docker-entrypoint-initdb.d/01_keyspace.cql | |
- source: cassandra_users.cql | |
target: /docker-entrypoint-initdb.d/02_users.cql | |
- source: cassandra_export.cql | |
target: /docker-entrypoint-initdb.d/03_export.cql | |
volumes: | |
# The shared volume is used to store the CSV file that is created by the cassandra export script. | |
- type: volume | |
source: shared | |
target: /mnt/data | |
healthcheck: | |
# We want to ensure that the cassandra container is healthy before we start the loader container. | |
test: ["CMD-SHELL", "nodetool status"] | |
interval: 15s | |
start_period: 2m | |
timeout: 10s | |
retries: 3 | |
db: | |
# The db container is the MySQL database that we want to import the data into. | |
# However, it is only temporary: we just want to load the data into it and then dump it to a file. | |
# Note that we do not use a volume for this container. | |
# This allows you to simply down the stack and start over with a clean database. | |
image: mariadb:11-ubi | |
environment: | |
MARIADB_DATABASE: test | |
MARIADB_USER: username | |
MARIADB_PASSWORD: password | |
MARIADB_ROOT_PASSWORD: t0Ps3cr3t | |
configs: | |
- source: mariadb_init.sql | |
target: /docker-entrypoint-initdb.d/01_init.sql | |
healthcheck: | |
# We want to ensure that the db container is healthy before we start the loader container. | |
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"] | |
start_period: 10s | |
interval: 10s | |
timeout: 5s | |
retries: 3 | |
loader: | |
image: mwmahlberg/scriptella:8-jre-1.2-rel0 | |
build: | |
# Scriptella is a weee bit old and pretty much abandoned. | |
# So we need to build our own image with the MySQL connector included. | |
# This is a multi-stage build: we first download the scriptella and mysql connector | |
# and then copy them into the final image. | |
context: . | |
dockerfile_inline: | | |
FROM alpine:latest AS downloader | |
ARG SCRIPTELLA_VERSION=1.2 | |
ARG MYSQL_CONNECTOR_VERSION=9.3.0 | |
WORKDIR /usr/local/src/ | |
RUN apk add curl unzip && \ | |
curl -LO https://github.com/scriptella/scriptella-etl/releases/download/scriptella-parent-$${SCRIPTELLA_VERSION}/scriptella-$${SCRIPTELLA_VERSION}.zip && \ | |
unzip scriptella-$${SCRIPTELLA_VERSION}.zip && \ | |
curl -qsL https://cdn.mysql.com//Downloads/Connector-J/mysql-connector-j-$${MYSQL_CONNECTOR_VERSION}.tar.gz \ | |
| tar -xzv --strip-components 1 -C scriptella-1.2/lib/ mysql-connector-j-$${MYSQL_CONNECTOR_VERSION}/mysql-connector-j-$${MYSQL_CONNECTOR_VERSION}.jar | |
FROM eclipse-temurin:11-jre-alpine-3.21 | |
ARG SCRIPTELLA_VERSION=1.2 | |
ENV PATH="$PATH:/usr/local/lib/scriptella/bin/" | |
COPY --from=downloader /usr/local/src/scriptella-$${SCRIPTELLA_VERSION} /usr/local/lib/scriptella | |
WORKDIR /opt/export | |
ENTRYPOINT [ "$$JAVA_HOME/bin/java","-jar","/usr/local/lib/scriptella/scriptella.jar"] | |
depends_on: | |
# We want to ensure that both our source and target | |
# databases are healthy before we start the loader container. | |
# For actual usage, you can reemove the depends_on section for cassandra. | |
cassandra: | |
condition: service_healthy | |
db: | |
condition: service_healthy | |
configs: | |
- source: scriptella_etl.properties | |
target: /opt/export/etl.properties | |
- source: scriptella_etl.xml | |
target: /opt/export/etl.xml | |
volumes: | |
# The shared volume is used to store the CSV file that is created by the cassandra export script. | |
# The loader container uses this file to import the data into the MySQL database. | |
# This is only needed for this demo. | |
# For actual usage, you would create the csv file on your cassandra cluster, | |
# copy it to your local machine, and then bind-mount it here into the loader container. | |
# eg: | |
# - type: bind | |
# source: path/on/host/to/your-csv-file.csv | |
# target: /mnt/data/export.csv | |
- type: volume | |
source: shared | |
target: /mnt/data | |
# The entrypoint is where the magic happens. | |
# We wait for the cassandra export to become available and then run the scriptella ETL job. | |
# The scriptella job will read the CSV file and insert the data into the MySQL database | |
# line by line. | |
entrypoint: | |
- /bin/sh | |
- -c | |
- | | |
until [ -f /mnt/data/export.csv ] | |
do | |
echo "Waiting for Cassandra to finish the export..." | |
sleep 5 | |
done | |
$$JAVA_HOME/bin/java -jar /usr/local/lib/scriptella/scriptella.jar | |
dumper: | |
# The final step is to dump the MySQL database to a file. | |
image: alpine:latest | |
depends_on: | |
# We want to ensure that the db container is healthy... | |
db: | |
condition: service_healthy | |
# ...and that the loader container has finished before we start the dumper container. | |
loader: | |
condition: service_completed_successfully | |
volumes: | |
# We bind-mount the the local directory ./dump to /mnt/data in the container. | |
# This is where the MySQL dump will be stored. | |
- type: bind | |
source: ./dump | |
target: /mnt/data | |
entrypoint: | |
- /bin/sh | |
- -c | |
- | | |
echo "Installing MySQL client..." | |
apk add --no-cache mysql-client | |
echo "Dumping MySQL data..." | |
mysqldump --skip-ssl-verify-server-cert -h db -u username -ppassword test | bzip2 -v9 > /mnt/data/dump.sql.bz2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment