Created
September 22, 2016 17:54
-
-
Save LouiseMcMahon/7ffb27c7d9b4244f87fa2821b593588c to your computer and use it in GitHub Desktop.
Run scrapy email output and S3 logs script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import boto3 | |
import datetime | |
import time | |
#wait 5 minutes | |
time.sleep(60*5) | |
#set file names | |
itemFileName = 'Crawl-Results-' + datetime.date.today().strftime("%d%m%y") + '.csv' | |
logFileName = 'Crawl-Results-' + datetime.date.today().strftime("%d%m%y") + '.log' | |
#run crawl | |
os.system('scrapy crawl spiderName -o ' + itemFileName + ' --logfile ' + logFileName) | |
#upload item file and logFile | |
s3Client = boto3.client('s3', region_name='eu-west-1') | |
bucketName = 'bucketName' | |
with open(itemFileName, 'rb') as data: | |
s3Client.put_object(Bucket=bucketName, Key='item-csv/' + itemFileName, Body=data, ContentType='text/plain', StorageClass='REDUCED_REDUNDANCY') | |
fileURL = 'https://s3-eu-west-1.amazonaws.com/'+bucketName+'/item-csv/' + itemFileName | |
with open(logFileName, 'rb') as data: | |
s3Client.put_object(Bucket=bucketName, Key='crawler-logs/' + logFileName, Body=data, ContentType='text/plain', StorageClass='REDUCED_REDUNDANCY') | |
#send email | |
sesClient = boto3.client('ses', region_name='eu-west-1') | |
sesClient.send_email( | |
Source='[email protected]', | |
Destination={ | |
'ToAddresses': [ | |
'[email protected]' | |
] | |
}, | |
Message={ | |
'Subject': { | |
'Data': 'Crawl Output' | |
}, | |
'Body': { | |
'Text': { | |
'Data': fileURL | |
} | |
} | |
}, | |
ReplyToAddresses=[ | |
'[email protected]', | |
] | |
) | |
#delete files | |
os.remove(itemFileName) | |
os.remove(logFileName) | |
#shutown instace | |
os.system("shutdown now -h") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment