Created
March 30, 2021 01:39
-
-
Save warabanshi/6cb7945fbea50d851b76d7cde29034b6 to your computer and use it in GitHub Desktop.
download zip file and retrieve an office file that the name start with "source"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io, requests, sys, zipfile | |
from wsgiref import simple_server | |
url_base = 'http://s3.amazonaws.com/xxxxxxxx' | |
s3_link = 'xxxxx.zip' | |
url = f"{url_base}/{s3_link}" | |
def get_content_type(filename): | |
if filename.endswith('.xlsx'): | |
return 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' | |
elif filename.endswith('.docx'): | |
return 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' | |
elif filename.endswith('.pptx'): | |
return 'application/vnd.openxmlformats-officedocument.presentationml.presentation' | |
else: | |
return 'text/plain' | |
def application(env, start_response): | |
print(f'get {url}') | |
r = requests.get(url) | |
zipdata = io.BytesIO() | |
zipdata.write(r.content) | |
zfile = zipfile.ZipFile(zipdata) | |
try: | |
src_fn = [name for name in zfile.namelist() if name.startswith('source')].pop() | |
content = zfile.open(src_fn) | |
except: | |
start_response('500 Internal Server Error', [('Content-type', 'text/plain')]) | |
return "source content wasn't found" | |
start_response('200 OK', [('Content-type', get_content_type(src_fn))]) | |
return [content.read()] | |
if __name__ == '__main__': | |
server = simple_server.make_server('', 8000, application) | |
server.serve_forever() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment