Created
January 18, 2021 20:06
-
-
Save dat-boris/efa302f2ef433ca74a43b6073d77381f to your computer and use it in GitHub Desktop.
Quick snippet for reading GCS file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import tempfile | |
import google.cloud.storage as storage | |
def analyse_one_logfile(gcs_full_path): | |
client = storage.Client() | |
bucket = client.get_bucket(DEV_LOGFILE_BUCKET_NAME) | |
m = re.match('gs://{}/(.*)'.format(DEV_LOGFILE_BUCKET_NAME), gcs_full_path) | |
assert m, "GCS path {} must be a valid log name".format(gcs_full_path) | |
gcs_path = m.group(1) | |
blob = bucket.get_blob(gcs_path) | |
with tempfile.NamedTemporaryFile() as tmpfile: | |
blob.download_to_file(tmpfile) | |
tmpfile.flush() | |
events = analyse_event_log(tmpfile.name) | |
events = sanitize_data(gcs_full_path, '<bogus_boot_id>', events) | |
return events |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment