Last active
December 11, 2016 04:18
-
-
Save okiriza/b200cd64359aa872354ff43b074d7eaa to your computer and use it in GitHub Desktop.
Lambda function for scraping Transjakarta CCTV screenshot (https://tentangdata.wordpress.com/2016/12/10/managed-database-semurah-tahu-tempe-dengan-amazon-dynamodb/)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime, timedelta | |
import urllib2 | |
from urllib2 import URLError | |
import boto3 | |
START_BYTE = b'\xff\xd8' | |
END_BYTE = b'\xff\xd9' | |
ITER_LIMIT = 10000 | |
def capture_img(url): | |
stream = urllib2.urlopen(url, timeout=15) | |
all_bytes = '' | |
i = 0 | |
while True and (i < ITER_LIMIT): | |
# Read MJPEG stream until start byte and end byte are found, or until ITER_LIMIT reads | |
i += 1 | |
all_bytes += stream.read(1024) | |
a = all_bytes.find(START_BYTE) | |
b = all_bytes.find(END_BYTE) | |
if a != -1 and b != -1: | |
return all_bytes[a : b+2] | |
def write_to_dynamo(item): | |
dynamodb = boto3.resource('dynamodb', region_name='ap-southeast-1') | |
table = dynamodb.Table('cctv-jak') | |
table.put_item(Item=item) | |
def lambda_handler(event, context): | |
loc_urls = { | |
"Pondok Indah 1 N": "http://202.51.112.91:727/image2", | |
"Pondok Indah 1 S": "http://202.51.112.91:728/image2", | |
# other locations to scrape | |
} | |
records = {} | |
for loc, url in loc_urls.iteritems(): | |
try: | |
img_byte_str = capture_img(url) | |
if not img_byte_str: | |
# Stream format not as expected, continue to next CCTV | |
continue | |
img_byte_str = img_byte_str.decode("ISO-8859-1") # for storage in DynamoDB | |
# +7 hour: handle timezone difference from server time to WIB time | |
timestamp = str(datetime.now() + timedelta(hours=7))[:19] | |
records[loc] = { | |
'loc': loc, | |
'timestamp': timestamp, | |
'img_bytes': img_byte_str | |
} | |
write_to_dynamo(records[loc]) | |
except URLError: | |
# Skip on timeout error | |
pass | |
return records |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment