-
-
Save jonico/d3e9e3959e532432d11bbd207c0e3fe9 to your computer and use it in GitHub Desktop.
import sys | |
from awsglue.transforms import * | |
from awsglue.utils import getResolvedOptions | |
from pyspark.context import SparkContext, SparkConf | |
from awsglue.context import GlueContext | |
from awsglue.job import Job | |
import time | |
from pyspark.sql.types import StructType, StructField, IntegerType, StringType | |
sc = SparkContext() | |
glueContext = GlueContext(sc) | |
spark = glueContext.spark_session | |
connection_mysql8_options_source = { | |
"url": "jdbc:mysql://5b9jyx2o6pbr.us-east-1.psdb.cloud/matrix-demos-jonico?workload=olap", | |
"dbtable": "pixel_matrix", | |
"user": "l4klb0hnyw7o", | |
"password": "redacted", | |
"customJdbcDriverS3Path": "s3://your-s3-bucket/mysql-connector-java-8.0.28.jar", | |
"customJdbcDriverClassName": "com.mysql.cj.jdbc.Driver"} | |
connection_mysql8_options_target = { | |
"url": "jdbc:mysql://5b9jyx2o6pbr.us-east-1.psdb.cloud/matrix-demos-jonico?workload=olap", | |
"dbtable": "target_matrix", | |
"user": "l4klb0hnyw7o", | |
"password": "redacted", | |
"customJdbcDriverS3Path": "s3://your-s3-bucket/mysql-connector-java-8.0.28.jar", | |
"customJdbcDriverClassName": "com.mysql.cj.jdbc.Driver"} | |
print ("Starting to connect to database") | |
# Read from JDBC databases with custom driver | |
df_source = glueContext.create_dynamic_frame.from_options(connection_type="mysql", connection_options=connection_mysql8_options_source, transformation_ctx = "df_source") | |
df_source = ApplyMapping.apply(frame = df_source, mappings = [("id", "integer", "id", "integer"), ("cell", "string", "cell", "string"), ("pixel_data", "string", "pixel_data", "string"), ("operation", "string", "operation", "string")]) | |
print ("Applied mapping to the Glue DynamicFrame") | |
df_source = df_source.drop_fields(['operation']) | |
df_source.printSchema() | |
glueContext.write_from_options(frame_or_dfc=df_source, connection_type="mysql", connection_options=connection_mysql8_options_target, transformation_ctx = "df_target") |
Instructions on how to download/upload MySQL 8.0 JDBC driver taken from https://aws.amazon.com/blogs/big-data/building-aws-glue-spark-etl-jobs-by-bringing-your-own-jdbc-drivers-for-amazon-rds/
If you need to connect to MySQL 8.x, then be aware that the test connection feature in the UI works only for MySQL 5.x versions. MySQL version 8 is not supported with the built-in AWS Glue JDBC driver. If you test the connection against a MySQL version newer than version 5.x, then you might get a connection timeout error. However, you can still use your AWS Glue connection to connect to MySQL version 8 with a workaround. Use the connection on an extract, load, and transform (ETL) job by manually providing the compatible driver JAR for MySQL version 8 and later. Then, load this JAR file into your job similar to how you load any JDBC driver on a Spark job. For more information, see Connection types and options for ETL in AWS Glue.
trick was to not specify any connection in the advanced settings of the job: