Skip to content

Instantly share code, notes, and snippets.

@nicholasdunbar
Last active August 29, 2015 14:17
Show Gist options
  • Save nicholasdunbar/0f9bc264fe8eaa95cd7f to your computer and use it in GitHub Desktop.
Save nicholasdunbar/0f9bc264fe8eaa95cd7f to your computer and use it in GitHub Desktop.
php based command line utility for restoring files from a back-up bucket to a target bucket.
#this is a property file that is used in awsUtil.php and
#can be used by https://github.com/MoriTanosuke/glacieruploader
#should you want to use it some where in your backup solution
# you will want to use user credentials that are limited by a policy like the one found here:
#https://gist.github.com/nicholasdunbar/70772946be646966dec1
accessKey=EXAMPLEOFAKEY
secretKey=ex3a2m9plkeof8a9s8e9c7ret2ke8y
<?php
/*
* Commands for S3 restoring files from a backup bucket with versioning:
*
* Usage:
*
* php awsUtil.php sync <source-bucket-name> <target-bucket-name> <start-timestamp>-<end-timestamp>
*
* Used to restore the <target-bucket-name> the last saved version in <source-bucket-name>
* between the timestamps range (<start-timestamp>-<end-timestamp>).
* If there are no versions saved between <start-timestamp>-<end-timestamp> then you have no
* it reverts the target bucket to before the specified range. What ever files that did not
* exist before the range are deleted from the target bucket.
*
* php awsUtil.php copy-versions <source-bucket-name> <target-bucket-name> <start-timestamp>-<end-timestamp>
*
* Used to restore files from a back up bucket in s3. Allows you to specify the
* timestamp range of the version you want to use. If their is only one version
* available for that file it uses it regardless of its timestamp. This won't let
* you sync deletemarkers
*
* php awsUtil.php copy-delete-markers <source-bucket-name> <target-bucket-name> <start-timestamp>-<end-timestamp>
*
* Finds all the deletemarkers in the <source-bucket-name> between the specified
* timestamps and then for each file where a delete marker is found it deletes the
* coresponding file in the <target-bucket-name>, this makes sure the deletions
* are in sync.
*
* php awsUtil.php delete-versions <target-bucket-name> <start-timestamp>-<end-timestamp>
*
* Used to delete versions that were created between two unix timestamps. Usefull for
* removing bad versions and thus restoring the last good version. It is also used to
* empty a bucket that has versioning enabled on it so that the bucket can be emptied
* since aws will not let you delete a bucket untill all versions and deletemarkers
* are removed from the bucket.
*
* php awsUtil.php list-timestamps <target-bucket-name> <grep-pattern>
*
* Used so you know what timestamp range to use. The grep-pattern uses the standard form of
* grep -E <grep-pattern>
* so <grep-pattern> would be something like
* [a-z]+
* The grep-pattern allows you to get the timestamps for certain files so you know what
* the range is that you want to use in order to correctly set timestamps in
* restore or delete-versions Example output found here: https://gist.github.com/nicholasdunbar/d8b9a95749fd25f0205e
*
*/
//CONFIGURE:
//get credentials
$creds = parse_ini_file('aws.properties');
$AWS_ACCESS_KEY_ID = $creds['accessKey'];
$AWS_SECRET_ACCESS_KEY = $creds[ 'secretKey' ];
$initCmds = "export AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID && export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY && ";
if (!ini_get('date.timezone')) {
date_default_timezone_set('UTC');
}
$dateStr = date('l jS \of F Y h:i:s A');
if (isset($argv[1]) && !($argv[1] == '--help' || $argv[1] == '-h') ){
$cmd = $argv[1];
} else {
echo "Available commands:\n";
echo "copy-versions <source-bucket-name> <target-bucket-name> <start-timestamp>-<end-timestamp>\n";
echo "sync-delete-markers <source-bucket-name> <target-bucket-name> <start-timestamp>-<end-timestamp>\n";
echo "delete-versions <target-bucket-name> <start-timestamp>-<end-timestamp>\n";
echo "list-timestamps <target-bucket-name> <grep-pattern>\n";
echo "sync <source-bucket-name> <target-bucket-name> <start-timestamp>-<end-timestamp>\n";
exit;
}
function myPassthru($cmd, $isPrintCmd = true){
global $initCmds;
exec($initCmds.$cmd.' > cmds.txt 2>&1');
if ($isPrintCmd){
echo $cmd;
echo "\n";
}
$output = '';
if (file_exists('cmds.txt')){
//get text file
$output = file_get_contents('cmds.txt');
unlink('cmds.txt');
}
return $output;
}
switch ($cmd) {
case 'copy-versions':
if (isset($argv[2])){
$sourceBucket = trim($argv[2]);
} else {
echo "missing source bucket name\n";
exit;
}
if (isset($argv[3])){
$targetBucket = trim($argv[3]);
} else {
echo "missing target bucket name\n";
exit;
}
if (isset($argv[4])){
$timeStampRange = trim($argv[4]);
} else {
echo "missing unix timestamp range <start-timestamp>-<end-timestamp>\n";
exit;
}
copyVersions($sourceBucket, $targetBucket, $timeStampRange);
break;
case 'copy-delete-markers':
if (isset($argv[2])){
$sourceBucket = trim($argv[2]);
} else {
echo "missing source bucket name\n";
exit;
}
if (isset($argv[3])){
$targetBucket = trim($argv[3]);
} else {
echo "missing target bucket name\n";
exit;
}
if (isset($argv[4])){
$timeStampRange = trim($argv[4]);
} else {
echo "missing unix timestamp range <start-timestamp>-<end-timestamp>\n";
exit;
}
copyDeleteMarkers($sourceBucket, $targetBucket, $timeStampRange);
break;
case 'delete-versions':
if (isset($argv[2])){
$targetBucket = trim($argv[2]);
} else {
echo "missing target bucket name\n";
exit;
}
if (isset($argv[3])){
$timeStampRange = trim($argv[3]);
} else {
echo "missing unix timestamp range <start-timestamp>-<end-timestamp>\n";
exit;
}
deleteVersions($targetBucket, $timeStampRange);
break;
case 'list-timestamps':
if (isset($argv[2])){
$targetBucket = trim($argv[2]);
} else {
echo "missing target bucket name\n";
exit;
}
if (isset($argv[3])){
$grepPattern = trim($argv[3]);
} else {
$grepPattern = '';
}
listTimestamps($targetBucket, $grepPattern);
break;
case 'sync':
if (isset($argv[2])){
$sourceBucket = trim($argv[2]);
} else {
echo "missing source bucket name\n";
exit;
}
if (isset($argv[3])){
$targetBucket = trim($argv[3]);
} else {
echo "missing target bucket name\n";
exit;
}
if (isset($argv[4])){
$timeStampRange = trim($argv[4]);
} else {
echo "missing unix timestamp range <start-timestamp>-<end-timestamp>\n";
exit;
}
sync($sourceBucket, $targetBucket, $timeStampRange);
//echo "sync is not available yet\n";
break;
default:
echo "unrecognized command: $cmd\n";
break;
}
function copyVersions($sourceBucket, $targetBucket, $timeStampRange){
$timeStampSplit = explode('-', $timeStampRange);
$fromTS = (int)$timeStampSplit[0];
$toTS = (int)$timeStampSplit[1];
if ($fromTS > $toTS){
echo "Error: timestamp range is in wrong order\n";
exit();
}
$fileListStr = myPassthru("aws s3api --output text list-object-versions --bucket $sourceBucket");
$fileListStr = trim($fileListStr);
$fileListArray = explode("\n", $fileListStr);
$filesInfo = array();
$len = count($fileListArray);
for ($i=0; $i < $len; $i++){
$fileListArray[$i] = preg_split('/\s+/', $fileListArray[$i]);
if ($fileListArray[$i][0] == 'VERSIONS'){
$fileName = $fileListArray[$i][3];
$date = $fileListArray[$i][4];
$date = strtotime($date);
$id = $fileListArray[$i][7];
if (!isset($filesInfo[$fileName])){
$filesInfo[$fileName] = array();
}
$filesInfo[$fileName][$date] = $id;
krsort($filesInfo[$fileName], SORT_NUMERIC);
}
}
//print_r($filesInfo);
$versionList = array();
foreach ($filesInfo as $fileName => $valueArray) {
foreach ($valueArray as $date => $id) {
if (count($valueArray) > 1){
if ($date >= $fromTS && $date <= $toTS){
array_push($versionList, array('key'=>$fileName,'version-id'=>$id));
break;
}
} else {
array_push($versionList, array('key'=>$fileName,'version-id'=>$id));
break;
}
}
}
$filesInfo = null;
$tempSync = './restoreTmp';
mkdir($tempSync);
foreach ($versionList as $versionInfo) {
$outputFileName = $versionInfo['key'];
echo myPassthru("aws s3api get-object --bucket {$sourceBucket} --key {$versionInfo['key']} --version-id {$versionInfo['version-id']} {$tempSync}/{$outputFileName}");
echo myPassthru("aws s3api put-object --bucket {$targetBucket} --key {$outputFileName} --body {$tempSync}/{$outputFileName}");
unlink("{$tempSync}/{$outputFileName}");
}
rmdir($tempSync);
//print_r($versionList);
if (count($versionList) == 0){
echo 'No files found in the specified unix timestamp range '.$timeStampRange."\n";
}
}
function copyDeleteMarkers($sourceBucket, $targetBucket, $timeStampRange){
$timeStampSplit = explode('-', $timeStampRange);
$fromTS = (int)$timeStampSplit[0];
$toTS = (int)$timeStampSplit[1];
if ($fromTS > $toTS){
echo "Error: timestamp range is in wrong order\n";
exit();
}
$fileListStr = myPassthru("aws s3api --output text list-object-versions --bucket $sourceBucket");
$fileListStr = trim($fileListStr);
$fileListArray = explode("\n", $fileListStr);
$filesInfo = array();
$len = count($fileListArray);
for ($i=0; $i < $len; $i++){
$fileListArray[$i] = preg_split('/\s+/', $fileListArray[$i]);
if ($fileListArray[$i][0] == 'DELETEMARKERS'){
$fileName = $fileListArray[$i][3];
$date = $fileListArray[$i][4];
$date = strtotime($date);
$id = $fileListArray[$i][7];
if (!isset($filesInfo[$fileName])){
$filesInfo[$fileName] = array();
}
$filesInfo[$fileName][$date] = $id;
krsort($filesInfo[$fileName], SORT_NUMERIC);
}
}
//print_r($filesInfo);
$versionList = array();
foreach ($filesInfo as $fileName => $valueArray) {
foreach ($valueArray as $date => $id) {
if (count($valueArray) > 1){
if ($date >= $fromTS && $date <= $toTS){
array_push($versionList, array('key'=>$fileName));
break;
}
} else {
array_push($versionList, array('key'=>$fileName));
break;
}
}
}
$filesInfo = null;
foreach ($versionList as $versionInfo) {
//echo "TODO: run this after testing \n";
echo myPassthru("aws s3api delete-object --bucket {$targetBucket} --key {$versionInfo['key']}");
}
//print_r($versionList);
if (count($versionList) == 0){
echo 'No files found in the specified unix timestamp range '.$timeStampRange."\n";
}
}
function deleteVersions($targetBucket, $timeStampRange){
$timeStampSplit = explode('-', $timeStampRange);
$fromTS = (int)$timeStampSplit[0];
$toTS = (int)$timeStampSplit[1];
if ($fromTS > $toTS){
echo "Error: timestamp range is in wrong order\n";
exit();
}
$fileListStr = myPassthru("aws s3api --output text list-object-versions --bucket $targetBucket");
$fileListStr = trim($fileListStr);
$fileListArray = explode("\n", $fileListStr);
$filesInfo = array();
$len = count($fileListArray);
for ($i=0; $i < $len; $i++){
$fileListArray[$i] = preg_split('/\s+/', $fileListArray[$i]);
if ($fileListArray[$i][0] == 'VERSIONS'){
$fileName = $fileListArray[$i][3];
$date = $fileListArray[$i][4];
$date = strtotime($date);
$id = $fileListArray[$i][7];
if (!isset($filesInfo[$fileName])){
$filesInfo[$fileName] = array();
}
$filesInfo[$fileName][$date] = $id;
krsort($filesInfo[$fileName], SORT_NUMERIC);
} else if ($fileListArray[$i][0] == 'DELETEMARKERS'){
$fileName = $fileListArray[$i][3];
$date = $fileListArray[$i][4];
$timestamp = strtotime($date);
$id = $fileListArray[$i][7];
$filesInfo[$fileName][$timestamp] = $id;
if (!isset($filesInfo[$fileName])){
$filesInfo[$fileName] = array();
}
krsort($filesInfo[$fileName], SORT_NUMERIC);
}
}
//print_r($filesInfo);
$versionList = array();
foreach ($filesInfo as $fileName => $valueArray) {
foreach ($valueArray as $date => $id) {
if (count($valueArray) > 1){
if ($date >= $fromTS && $date <= $toTS){
array_push($versionList, array('key'=>$fileName,'version-id'=>$id));
break;
}
}
}
}
$filesInfo = null;
foreach ($versionList as $versionInfo) {
echo myPassthru("aws s3api delete-object --bucket {$targetBucket} --key {$versionInfo['key']} --version-id {$versionInfo['version-id']}");
}
//print_r($versionList);
if (count($versionList) == 0){
echo 'No files found in the specified unix timestamp range '.$timeStampRange."\n";
}
}
function listTimestamps($targetBucket, $grepPattern = ''){
if ($grepPattern == ''){
$fileListStr = myPassthru("aws s3api --output text list-object-versions --bucket $targetBucket", false);
} else {
$fileListStr = myPassthru("aws s3api --output text list-object-versions --bucket $targetBucket | grep -E \"$grepPattern\"", false);
}
$fileListStr = trim($fileListStr);
$fileListArray = explode("\n", $fileListStr);
$filesInfo = array();
$len = count($fileListArray);
for ($i=0; $i < $len; $i++){
$fileListArray[$i] = preg_split('/\s+/', $fileListArray[$i]);
if ($fileListArray[$i][0] == 'VERSIONS'){
$fileName = $fileListArray[$i][3];
$versionType = $fileListArray[$i][0];
$fullDate = $fileListArray[$i][4];
$date = strtotime($fullDate);
$id = $fileListArray[$i][7];
if (!isset($filesInfo[$fileName])){
$filesInfo[$fileName] = array();
}
$filesInfo[$fileName][$date] = array('full-date'=>$fullDate, 'unix-timestamp'=>$date, 'version-type'=>$versionType, 'version-id'=>$id);
krsort($filesInfo[$fileName], SORT_NUMERIC);
} else if ($fileListArray[$i][0] == 'DELETEMARKERS'){
$versionType = $fileListArray[$i][0];
$fileName = $fileListArray[$i][2];
$date = $fileListArray[$i][3];
$timestamp = strtotime($date);
$id = $fileListArray[$i][4];
if (!isset($filesInfo[$fileName])){
$filesInfo[$fileName] = array();
}
$filesInfo[$fileName][$timestamp] = array('full-date'=>$date, 'unix-timestamp'=>$timestamp, 'version-type'=>$versionType, 'version-id'=>$id);
krsort($filesInfo[$fileName], SORT_NUMERIC);
}
}
if (count($filesInfo) == 0){
echo "No files found\n";
} else {
echo json_encode($filesInfo, JSON_PRETTY_PRINT);
}
}
function sync($sourceBucket, $targetBucket, $timeStampRange){
//make sure that deletions are being backed up to the versions in a bucket at a cetain time
// if we remove an object from the backup bucket and there is no deletion marker, then it should be removed from the target bucket
// if a delete marker is not available durring that time check to see if a version is available before the timestamp range, if it is a deletemarker then use it, if there is no version available then delete it
//make sure versions from the back up bucket are being transfered over
// if a version is not available durring that time use the first version available before the timestamp range,
// if nothing is available before then use
// if there is no version available before the timestamp range then use the first version available after
//PROCESS PARAMETERS
$timeStampSplit = explode('-', $timeStampRange);
$fromTS = (int)$timeStampSplit[0];
$toTS = (int)$timeStampSplit[1];
if ($fromTS > $toTS){
echo "Error: timestamp range is in wrong order\n";
exit();
}
//GET SOURCE VERSIONS LIST
$fileListStr = myPassthru("aws s3api --output text list-object-versions --bucket $sourceBucket", false);
$fileListStr = trim($fileListStr);
$fileListArray = explode("\n", $fileListStr);
//STRUCTURE VERSIONS LIST
//organize the versions and deletemarkers by filename
$filesInfo = array();
$sourceFileNames = array();
$len = count($fileListArray);
for ($i=0; $i < $len; $i++){
$fileListArray[$i] = preg_split('/\s+/', $fileListArray[$i]);
if ($fileListArray[$i][0] == 'VERSIONS'){
$versionType = $fileListArray[$i][0];
$fileName = $fileListArray[$i][3];
$date = $fileListArray[$i][4];
$timestamp = strtotime($date);
$id = $fileListArray[$i][7];
if (!isset($filesInfo[$fileName])){
$filesInfo[$fileName] = array();
}
$filesInfo[$fileName][$timestamp] = array('version-type'=>$versionType,'version-id'=>$id);
krsort($filesInfo[$fileName], SORT_NUMERIC);
if (!in_array($fileName, $sourceFileNames)){
array_push($sourceFileNames, $fileName);
}
} else if ($fileListArray[$i][0] == 'DELETEMARKERS'){
$versionType = $fileListArray[$i][0];
$fileName = $fileListArray[$i][2];
$date = $fileListArray[$i][3];
$timestamp = strtotime($date);
$id = $fileListArray[$i][4];
if (!isset($filesInfo[$fileName])){
$filesInfo[$fileName] = array();
}
$filesInfo[$fileName][$timestamp] = array('version-type'=>$versionType,'version-id'=>$id);
krsort($filesInfo[$fileName], SORT_NUMERIC);
if (!in_array($fileName, $sourceFileNames)){
array_push($sourceFileNames, $fileName);
}
}
}
//BUILD SYNC LIST
//filter versions by most recent in the date range
//print_r($filesInfo);
$versionList = array();
foreach ($filesInfo as $fileName => $valueArray) {
$isInRange = false;
foreach ($valueArray as $date => $infoArray) {
if ($date >= $fromTS && $date <= $toTS){
$isInRange = true;
break;
}
}
if ($isInRange){
$result = array
(
'key'=>$fileName,
'version-type'=>$infoArray['version-type'],
'version-id'=>$infoArray['version-id']
);
array_push($versionList, $result);
continue;
}
//not in range so check before date range
$isBeforeRange = false;
foreach ($valueArray as $date => $infoArray) {
if ($date < $fromTS){
$isBeforeRange = true;
break;
}
}
if ($isBeforeRange){
$result = array
(
'key'=>$fileName,
'version-type'=>$infoArray['version-type'],
'version-id'=>$infoArray['version-id']
);
array_push($versionList, $result);
} else {
//nothing before so delete it
$result = array
(
'key'=>$fileName,
'version-type'=>'DELETE',
'version-id'=>0
);
array_push($versionList, $result);
}
}
//ADD MISSING FILES TO SYNC LIST
//when the target bucket has files that the source bucket does not the we
//need to delete those files from the target directory to bring them in sync
//with the source directory
$targetFileListStr = myPassthru("aws s3api --output text list-objects --bucket $targetBucket", false);
$targetFileListStr = trim($targetFileListStr);
$targetFileListArray = explode("\n", $targetFileListStr);
$len = count($targetFileListArray);
//create list of file names
$targetFileNames = array();
for ($i=0; $i < $len; $i++){
$targetFileListArray[$i] = preg_split('/\s+/', $targetFileListArray[$i]);
if ($targetFileListArray[$i][0] == 'CONTENTS'){
$fileName = $targetFileListArray[$i][2];
if (!in_array($fileName, $targetFileNames)){
array_push($targetFileNames, $fileName);
}
}
}
$filesToDelete = array_diff($targetFileNames, $sourceFileNames);
foreach ($filesToDelete as $fileNameToRemove) {
$result = array
(
'key'=>$fileNameToRemove,
'version-type'=>'DELETEMISSING',
'version-id'=>0
);
array_push($versionList, $result);
}
//SYNC ALL THE FILES ON THE SYNC LIST
$filesInfo = null;
$tempSync = './awsUtilSyncTmp';
mkdir($tempSync);
foreach ($versionList as $versionInfo) {
switch ($versionInfo['version-type']) {
case 'VERSIONS':
$outputFileName = $versionInfo['key'];
echo myPassthru("aws s3api get-object --bucket {$sourceBucket} --key {$versionInfo['key']} --version-id {$versionInfo['version-id']} {$tempSync}/{$outputFileName}");
//echo "\n";
echo myPassthru("aws s3api put-object --bucket {$targetBucket} --key {$outputFileName} --body {$tempSync}/{$outputFileName}");
//echo "\n";
if (file_exists("{$tempSync}/{$outputFileName}")){
unlink("{$tempSync}/{$outputFileName}");
}
break;
case 'DELETEMARKERS':
case 'DELETEMISSING':
case 'DELETE':
//Check to make sure the target-bucket does have the file
if (in_array($versionInfo['key'], $targetFileNames)){
echo myPassthru("aws s3api delete-object --bucket {$targetBucket} --key {$versionInfo['key']}");
//echo "\n";
}
break;
default:
break;
}
}
rmdir($tempSync);
}
#!/bin/bash
###
# Used to back up an S3 bucket, customize the script as you need
# to restore files from certain snap shots use the commands in
# the awsUtil.php file
###
#CONFIGURATION
export bucket_to_backup='example-of-a-bucket-name';
export backup_bucket='example-of-a-backup-bucket-name';
#GET CREDENTIALS
source aws.properties;
export AWS_ACCESS_KEY_ID=$accessKey;
export AWS_SECRET_ACCESS_KEY=$secretKey;
#you will need to download and install the s3cmd tool at the following url:
#http://s3tools.org/
s3cmd --access_key=$AWS_ACCESS_KEY_ID --secret_key=$AWS_SECRET_ACCESS_KEY --recursive sync s3://$bucket_to_backup s3://$backup_bucket;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment