Last active
August 24, 2022 13:07
-
-
Save ilyesAj/fd7cf8d72a5f79cb128cb510a01d4634 to your computer and use it in GitHub Desktop.
a buffered version of s3 copy using kubernetes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apiVersion: v1 | |
kind: Namespace | |
metadata: | |
name: s3-copy | |
--- | |
apiVersion: batch/v1 | |
kind: Job | |
metadata: | |
name: s3-copy-bucket-a | |
namespace: s3-copy | |
spec: | |
backoffLimit: 0 | |
template: | |
metadata: | |
name: s3-copy | |
spec: | |
containers: | |
- image: amazon/aws-cli | |
name: s3-sync-local | |
command: ["/bin/sh"] | |
args: | |
- -c | |
- "mkdir -p /data/tempdir && aws s3 sync --source-region $SOURCE_REGION --no-progress s3://$SOURCE_BUCKET/$SUBPATH /data/tempdir/$SUBPATH" | |
env: | |
- name: AWS_SECRET_ACCESS_KEY | |
valueFrom: | |
secretKeyRef: | |
name: aws-s3-sync-credentials | |
key: AWS_SECRET_ACCESS_KEY | |
- name: AWS_ACCESS_KEY_ID | |
valueFrom: | |
secretKeyRef: | |
name: aws-s3-sync-credentials | |
key: AWS_ACCESS_KEY_ID | |
- name: SOURCE_BUCKET | |
value: bucket-a # source bucket | |
- name: DESTINATION_BUCKET | |
value: bucket-b # destination bucket | |
- name: SOURCE_REGION | |
value: eu-west-1 # the region of source bucket | |
- name: DESTINATION_REGION | |
value: eu-west-3 # the region of destination bucket | |
- name: SUBPATH | |
value: "" # this variable is used if you want to copy particular path on the bucket, can be used like filter and exlude | |
volumeMounts: | |
- name: data | |
mountPath: /data | |
resources: | |
requests: | |
cpu: 1 | |
memory: 2Gi | |
limits: | |
cpu: 2 | |
memory: 4Gi | |
- image: amazon/aws-cli | |
name: s3-sync-remote | |
command: ["/bin/sh"] | |
args: | |
- -c | |
- "echo starting && sleep 5 && ls /data/tempdir/${SUBPATH} | wc -l && while [ $(ls /data/tempdir/${SUBPATH}/ | wc -l) -gt 0 ] ; do aws s3 mv --recursive /data/tempdir/ s3://$DESTINATION_BUCKET --region $DESTINATION_REGION --no-progress ; sleep 5 ; done && echo stopped" | |
env: | |
- name: AWS_SECRET_ACCESS_KEY | |
valueFrom: | |
secretKeyRef: | |
name: aws-s3-sync-credentials | |
key: AWS_SECRET_ACCESS_KEY | |
- name: AWS_ACCESS_KEY_ID | |
valueFrom: | |
secretKeyRef: | |
name: aws-s3-sync-credentials | |
key: AWS_ACCESS_KEY_ID | |
- name: SOURCE_BUCKET | |
value: bucket-a | |
- name: DESTINATION_BUCKET | |
value: bucket-b | |
- name: SOURCE_REGION | |
value: eu-west-1 | |
- name: DESTINATION_REGION | |
value: eu-west-3 | |
- name: SUBPATH | |
value: "7" | |
volumeMounts: | |
- name: data | |
mountPath: /data | |
resources: | |
requests: | |
cpu: 1 | |
memory: 2Gi | |
limits: | |
cpu: 2 | |
memory: 4Gi | |
restartPolicy: Never | |
volumes: | |
- name: data | |
persistentVolumeClaim: | |
claimName: s3-copy-bucket-a | |
--- | |
apiVersion: v1 | |
kind: PersistentVolumeClaim | |
metadata: | |
name: s3-copy-bucket-a | |
namespace: s3-copy | |
labels: | |
app: s3-copy-bucket-a | |
spec: | |
accessModes: | |
- ReadWriteOnce | |
resources: | |
requests: | |
storage: 250Gi # size of the shared folder/buffer ; adjust the storage capacity accordingly to the size of the bucket | |
--- | |
apiVersion: v1 | |
kind: Secret | |
metadata: | |
name: aws-s3-sync-credentials | |
namespace: s3-copy | |
type: Opaque | |
stringData: | |
AWS_ACCESS_KEY_ID: XX | |
AWS_SECRET_ACCESS_KEY: XX |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
full implementation of this code here