Last active
August 29, 2015 14:05
-
-
Save meyju/3a5ddc9c77e3c6bbd563 to your computer and use it in GitHub Desktop.
Scipt repairs damaged s3 filenames from aws elb logfiles where the ip is missing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Info: Scipt repairs damaged s3 filenames from aws elb logfiles where the ip is missing | |
# Autor: Julian Meyer | |
# Version: 0.1 | |
# | |
# USE AT YOUR OWN RISK! | |
# Options: | |
bucket="bucketname_with-elb-logs" | |
prefix="some-prefix" | |
object_list="object_list.txt" | |
taskfile="tasks.sh" | |
# Example damaged filename (with line endings in it): | |
##################################################### | |
# mybucket/AWSLogs/1234567890/elasticloadbalancing/eu-west-1/2014/08/22/1234567890_elasticloadbalancing_eu-west-1_elb-name_20140822T0000Z_<?xml version="1.0" encoding="iso-8859-1"?> | |
# <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" | |
# "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
# <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
# <head> | |
# <title>404 - Not Found</title> | |
# </head> | |
# <body> | |
# <h1>404 - Not Found</h1> | |
# </body> | |
# </html>_1rt2csbi.log | |
##################################################### | |
############################## | |
# Main "Prog": | |
# clear files to be safe if rerun | |
> $taskfile | |
> $object_list | |
# get damaged logfiles | |
aws s3api list-objects --bucket $bucket --prefix $prefix|grep '"Key"'|grep 'xml version'|awk -F '"Key": "' '{print $2}'|sed 's/",.$//g' > $object_list | |
IFS=$',' | |
while read -r line | |
do | |
name=$line | |
new_name=$(echo $name| sed 's/<?\xml.*\/html>/1.2.3.4/g') | |
echo "Object wrong: $name" | |
echo "" | |
echo "Object right: $new_name" | |
echo "-----" | |
echo -n "aws s3 mv $'s3://$bucket/" >> $taskfile | |
echo -n $name >> $taskfile | |
echo -n "' s3://$bucket/">> $taskfile | |
echo -n $new_name >> $taskfile | |
echo "" >> $taskfile | |
done < $object_list | |
unset IFS | |
# Run the fix | |
echo "" | |
echo "" | |
echo "Renaming the damaged files..." | |
bash $taskfile | |
# Remove Temp Files | |
rm $object_list | |
rm $taskfile |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@meyju Standard output format does not seem to return a 'Key' line. Perhaps add '--output json' to the list-objects cmd?