Skip to content

Instantly share code, notes, and snippets.

@mwmahlberg
Last active May 19, 2025 17:57
Show Gist options
  • Save mwmahlberg/560d668f3d8e621af6eb4f2a6cb768a7 to your computer and use it in GitHub Desktop.
Save mwmahlberg/560d668f3d8e621af6eb4f2a6cb768a7 to your computer and use it in GitHub Desktop.
cassandra 2 mysql dump via scriptellla
configs:
scriptella_etl.properties:
# Scriptella ETL Configuration Properties
# https://scriptella.org/reference/index.html#%3Cproperties%3E
# Basically, we define variables that are used in the script.
file: etl.properties
scriptella_etl.xml:
# Scriptella ETL Configuration XML
# Our actual conversion script.
# For each row in the CSV file, we insert a new row into the MySQL database.
# In this demo, the CSV file is created by the Cassandra export script.
# The CSV file is created in the shared volume, which is mounted to /mnt/data
# in both the cassandra and loader containers.
#
# Note that the double dollar signs are only here to escape the dollar sign
# within the docker-compose.yaml file.
#
# ALSO note that you will definetly want to adjust the
# ops property in the property file to your needs.
# More info on batching in scriptella:
# http://scriptella.org/docs/api/scriptella/jdbc/package-summary.html#batching
content: |
<!DOCTYPE etl SYSTEM "http://scriptella.org/dtd/etl.dtd">
<etl>
<description>Import of arbitrary length CSV file into MySQL</description>
<properties>
<include href="etl.properties"/> <!--Load from external properties file-->
</properties>
<connection id="in" driver="csv" url="/mnt/data/export.csv">
statement.fetchSize=$$ops
</connection>
<connection driver="$$driver" id="out" url="$$url" user="$$user"
password="$$password" classpath="$$classpath">
statement.batchSize=$$ops
</connection>
<query connection-id="in">
<!--Empty
query means select all -->
<script connection-id="out">
INSERT INTO users (firstname,lastname) VALUES (?fname,?lname);
</script>
</query>
</etl>
mariadb_init.sql:
# Just a simple SQL script to create the table.
# You will need to adjust that to your data model(s).
content: |
CREATE TABLE IF NOT EXISTS users (
id INT AUTO_INCREMENT PRIMARY KEY,
firstname VARCHAR(255) NOT NULL,
lastname VARCHAR(255) NOT NULL
);
services:
db:
# The db container is the MySQL database that we want to import the data into.
# However, it is only temporary: we just want to load the data into it and then dump it to a file.
# Note that we do not use a volume for this container.
# This allows you to simply down the stack and start over with a clean database.
image: mariadb:11-ubi
environment:
MARIADB_DATABASE: test
MARIADB_USER: username
MARIADB_PASSWORD: password
MARIADB_ROOT_PASSWORD: t0Ps3cr3t
configs:
- source: mariadb_init.sql
target: /docker-entrypoint-initdb.d/01_init.sql
healthcheck:
# We want to ensure that the db container is healthy before we start the loader container.
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
start_period: 10s
interval: 10s
timeout: 5s
retries: 3
loader:
image: mwmahlberg/scriptella:8-jre-1.2-rel0
build:
# Scriptella is a weee bit old and pretty much abandoned.
# So we need to build our own image with the MySQL connector included.
# This is a multi-stage build: we first download the scriptella and mysql connector
# and then copy them into the final image.
context: .
dockerfile_inline: |
FROM alpine:latest AS downloader
ARG SCRIPTELLA_VERSION=1.2
ARG MYSQL_CONNECTOR_VERSION=9.3.0
WORKDIR /usr/local/src/
RUN apk add curl unzip && \
curl -LO https://github.com/scriptella/scriptella-etl/releases/download/scriptella-parent-$${SCRIPTELLA_VERSION}/scriptella-$${SCRIPTELLA_VERSION}.zip && \
unzip scriptella-$${SCRIPTELLA_VERSION}.zip && \
curl -qsL https://cdn.mysql.com//Downloads/Connector-J/mysql-connector-j-$${MYSQL_CONNECTOR_VERSION}.tar.gz \
| tar -xzv --strip-components 1 -C scriptella-1.2/lib/ mysql-connector-j-$${MYSQL_CONNECTOR_VERSION}/mysql-connector-j-$${MYSQL_CONNECTOR_VERSION}.jar
FROM eclipse-temurin:11-jre-alpine-3.21
ARG SCRIPTELLA_VERSION=1.2
ENV PATH="$PATH:/usr/local/lib/scriptella/bin/"
COPY --from=downloader /usr/local/src/scriptella-$${SCRIPTELLA_VERSION} /usr/local/lib/scriptella
WORKDIR /opt/export
ENTRYPOINT [ "$$JAVA_HOME/bin/java","-jar","/usr/local/lib/scriptella/scriptella.jar"]
depends_on:
# We want to ensure that the db container is healthy.
db:
condition: service_healthy
configs:
- source: scriptella_etl.properties
target: /opt/export/etl.properties
- source: scriptella_etl.xml
target: /opt/export/etl.xml
volumes:
- type: bind
source: ./export.csv
target: /mnt/data/export.csv
# The entrypoint is where the magic happens.
# We wait for the cassandra export to become available and then run the scriptella ETL job.
# The scriptella job will read the CSV file and insert the data into the MySQL database
# line by line.
entrypoint:
- /bin/ash
- -c
- |
until [ -f /mnt/data/export.csv ]
do
echo "Waiting for Cassandra to finish the export..."
sleep 5
done
$$JAVA_HOME/bin/java -jar /usr/local/lib/scriptella/scriptella.jar
dumper:
# The final step is to dump the MySQL database to a file.
image: alpine:latest
depends_on:
# We want to ensure that the db container is healthy...
db:
condition: service_healthy
# ...and that the loader container has finished before we start the dumper container.
loader:
condition: service_completed_successfully
volumes:
# We bind-mount the the local directory ./dump to /mnt/data in the container.
# This is where the MySQL dump will be stored.
- type: bind
source: ./dump
target: /mnt/data
entrypoint:
- /bin/sh
- -c
- |
echo "Installing MySQL client..."
apk add --no-cache mysql-client
echo "Dumping MySQL data..."
mysqldump --skip-ssl-verify-server-cert -h db -u username -ppassword test | bzip2 -v9 > /mnt/data/dump.sql.bz2
configs:
cassandra_keyspace.cql:
# setup script for our demo.
content: |
CREATE KEYSPACE IF NOT EXISTS sample WITH REPLICATION =
{ 'class' : 'SimpleStrategy', 'replication_factor' : 1 }
AND DURABLE_WRITES = true;
cassandra_users.cql:
# We insert some demo users into the users table.
content: |
CREATE TABLE IF NOT EXISTS sample.users (
id UUID PRIMARY KEY,
lname text,
fname text );
INSERT INTO sample.users (id, fname, lname) VALUES (uuid(), 'John', 'Doe');
INSERT INTO sample.users (id, fname, lname) VALUES (uuid(), 'Jane', 'Doe');
INSERT INTO sample.users (id, fname, lname) VALUES (uuid(), 'Jim', 'Beam');
INSERT INTO sample.users (id, fname, lname) VALUES (uuid(), 'Jack', 'Daniels');
INSERT INTO sample.users (id, fname, lname) VALUES (uuid(), 'Johnny', 'Walker');
cassandra_export.cql:
# This is the first interesting part of the demo.
# We export the data from the Cassandra database to a CSV file.
# See https://docs.datastax.com/en/cql/hcd/reference/cqlsh-commands/copy.html
# for more information.
# The CSV file is created in the shared volume, which is mounted to /mnt/data
# in both the cassandra and loader containers.
content: |
COPY sample.users (id, fname, lname) TO '/mnt/data/export.csv' WITH HEADER = true;
scriptella_etl.properties:
# Scriptella ETL Configuration Properties
# https://scriptella.org/reference/index.html#%3Cproperties%3E
# Basically, we define variables that are used in the script.
content: |
#Scriptella ETL Configuration Properties
driver=mysql
url=jdbc:mysql://db:3306/test
user=username
password=password
classpath=/usr/local/lib/scriptella/lib/mysql-connector-j-9.3.0.jar
ops=3
scriptella_etl.xml:
# Scriptella ETL Configuration XML
# Our actual conversion script.
# For each row in the CSV file, we insert a new row into the MySQL database.
# In this demo, the CSV file is created by the Cassandra export script.
# The CSV file is created in the shared volume, which is mounted to /mnt/data
# in both the cassandra and loader containers.
#
# Note that the double dollar signs are only here to escape the dollar sign
# within the docker-compose.yaml file.
#
# ALSO note that you will definetly want to adjust the
# ops property in the property file to your needs.
# More info on batching in scriptella:
# http://scriptella.org/docs/api/scriptella/jdbc/package-summary.html#batching
content: |
<!DOCTYPE etl SYSTEM "http://scriptella.org/dtd/etl.dtd">
<etl>
<description>Import of arbitrary length CSV file into MySQL</description>
<properties>
<include href="etl.properties"/> <!--Load from external properties file-->
</properties>
<connection id="in" driver="csv" url="/mnt/data/export.csv">
statement.fetchSize=$$ops
</connection>
<connection driver="$$driver" id="out" url="$$url" user="$$user"
password="$$password" classpath="$$classpath">
statement.batchSize=$$ops
</connection>
<query connection-id="in">
<!--Empty
query means select all -->
<script connection-id="out">
INSERT INTO users (firstname,lastname) VALUES (?fname,?lname);
</script>
</query>
</etl>
mariadb_init.sql:
# Just a simple SQL script to create the table.
# You will need to adjust that to your data model(s).
content: |
CREATE TABLE IF NOT EXISTS users (
id INT AUTO_INCREMENT PRIMARY KEY,
firstname VARCHAR(255) NOT NULL,
lastname VARCHAR(255) NOT NULL
);
volumes:
shared:
# This is a shared volume that is used by the cassandra and loader containers.
# It is used to store the CSV file that is created by the cassandra export script.
# The loader container uses this file to import the data into the MySQL database.
# This is only needed for this demo.
# For actual usage, you would create the csv file on your cassandra cluster,
# copy it to your local machine, and then bind-mount it to the loader container.
# driver: local
# driver_opts:
# o: "uid=1001"
services:
volume-permissions:
# This is a simple container that is used to set the permissions on the shared volume.
# It is needed because the cassandra container runs as root and the loader container
# runs as a non-root user.
image: alpine:latest
volumes:
- type: volume
source: shared
target: /mnt/data
entrypoint:
- /bin/sh
- -c
- |
echo "Setting permissions on shared volume..."
chown -R 1001:1001 /mnt/data
cassandra:
# The cassandra container is only used in this demo.
# For actual usage, you can remove this container and instead
# bind-mount the CSV file you createed on your cassandra cluster
# into the loader container.
image: bitnami/cassandra:5.0
depends_on:
# We want to ensure that the volume-permissions container has finished
# before we start the cassandra container.
volume-permissions:
condition: service_completed_successfully
configs:
# Setup the keyspace and users table and force the export.
- source: cassandra_keyspace.cql
target: /docker-entrypoint-initdb.d/01_keyspace.cql
- source: cassandra_users.cql
target: /docker-entrypoint-initdb.d/02_users.cql
- source: cassandra_export.cql
target: /docker-entrypoint-initdb.d/03_export.cql
volumes:
# The shared volume is used to store the CSV file that is created by the cassandra export script.
- type: volume
source: shared
target: /mnt/data
healthcheck:
# We want to ensure that the cassandra container is healthy before we start the loader container.
test: ["CMD-SHELL", "nodetool status"]
interval: 15s
start_period: 2m
timeout: 10s
retries: 3
db:
# The db container is the MySQL database that we want to import the data into.
# However, it is only temporary: we just want to load the data into it and then dump it to a file.
# Note that we do not use a volume for this container.
# This allows you to simply down the stack and start over with a clean database.
image: mariadb:11-ubi
environment:
MARIADB_DATABASE: test
MARIADB_USER: username
MARIADB_PASSWORD: password
MARIADB_ROOT_PASSWORD: t0Ps3cr3t
configs:
- source: mariadb_init.sql
target: /docker-entrypoint-initdb.d/01_init.sql
healthcheck:
# We want to ensure that the db container is healthy before we start the loader container.
test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
start_period: 10s
interval: 10s
timeout: 5s
retries: 3
loader:
image: mwmahlberg/scriptella:8-jre-1.2-rel0
build:
# Scriptella is a weee bit old and pretty much abandoned.
# So we need to build our own image with the MySQL connector included.
# This is a multi-stage build: we first download the scriptella and mysql connector
# and then copy them into the final image.
context: .
dockerfile_inline: |
FROM alpine:latest AS downloader
ARG SCRIPTELLA_VERSION=1.2
ARG MYSQL_CONNECTOR_VERSION=9.3.0
WORKDIR /usr/local/src/
RUN apk add curl unzip && \
curl -LO https://github.com/scriptella/scriptella-etl/releases/download/scriptella-parent-$${SCRIPTELLA_VERSION}/scriptella-$${SCRIPTELLA_VERSION}.zip && \
unzip scriptella-$${SCRIPTELLA_VERSION}.zip && \
curl -qsL https://cdn.mysql.com//Downloads/Connector-J/mysql-connector-j-$${MYSQL_CONNECTOR_VERSION}.tar.gz \
| tar -xzv --strip-components 1 -C scriptella-1.2/lib/ mysql-connector-j-$${MYSQL_CONNECTOR_VERSION}/mysql-connector-j-$${MYSQL_CONNECTOR_VERSION}.jar
FROM eclipse-temurin:11-jre-alpine-3.21
ARG SCRIPTELLA_VERSION=1.2
ENV PATH="$PATH:/usr/local/lib/scriptella/bin/"
COPY --from=downloader /usr/local/src/scriptella-$${SCRIPTELLA_VERSION} /usr/local/lib/scriptella
WORKDIR /opt/export
ENTRYPOINT [ "$$JAVA_HOME/bin/java","-jar","/usr/local/lib/scriptella/scriptella.jar"]
depends_on:
# We want to ensure that both our source and target
# databases are healthy before we start the loader container.
# For actual usage, you can reemove the depends_on section for cassandra.
cassandra:
condition: service_healthy
db:
condition: service_healthy
configs:
- source: scriptella_etl.properties
target: /opt/export/etl.properties
- source: scriptella_etl.xml
target: /opt/export/etl.xml
volumes:
# The shared volume is used to store the CSV file that is created by the cassandra export script.
# The loader container uses this file to import the data into the MySQL database.
# This is only needed for this demo.
# For actual usage, you would create the csv file on your cassandra cluster,
# copy it to your local machine, and then bind-mount it here into the loader container.
# eg:
# - type: bind
# source: path/on/host/to/your-csv-file.csv
# target: /mnt/data/export.csv
- type: volume
source: shared
target: /mnt/data
# The entrypoint is where the magic happens.
# We wait for the cassandra export to become available and then run the scriptella ETL job.
# The scriptella job will read the CSV file and insert the data into the MySQL database
# line by line.
entrypoint:
- /bin/sh
- -c
- |
until [ -f /mnt/data/export.csv ]
do
echo "Waiting for Cassandra to finish the export..."
sleep 5
done
$$JAVA_HOME/bin/java -jar /usr/local/lib/scriptella/scriptella.jar
dumper:
# The final step is to dump the MySQL database to a file.
image: alpine:latest
depends_on:
# We want to ensure that the db container is healthy...
db:
condition: service_healthy
# ...and that the loader container has finished before we start the dumper container.
loader:
condition: service_completed_successfully
volumes:
# We bind-mount the the local directory ./dump to /mnt/data in the container.
# This is where the MySQL dump will be stored.
- type: bind
source: ./dump
target: /mnt/data
entrypoint:
- /bin/sh
- -c
- |
echo "Installing MySQL client..."
apk add --no-cache mysql-client
echo "Dumping MySQL data..."
mysqldump --skip-ssl-verify-server-cert -h db -u username -ppassword test | bzip2 -v9 > /mnt/data/dump.sql.bz2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment