Last active
August 29, 2015 14:02
-
-
Save MattFaus/d16c6baab88a018fda97 to your computer and use it in GitHub Desktop.
Some helper functions to build a SELECT statement for defining a view.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_table_schema(dataset, table): | |
"""If the table exists, returns its schema. Otherwise, returns None.""" | |
table_service = BigQueryService.get_service().tables() | |
try: | |
get_result = table_service.get( | |
projectId=BQ_PROJECT_ID, | |
datasetId=dataset, | |
tableId=table | |
).execute() | |
return get_result['schema'] | |
except apiclient_errors.HttpError as e: | |
# Return None if the table doesn't exist. | |
if json.loads(e.content)['error']['code'] == 404: | |
return None | |
else: | |
raise | |
def get_leaf_column_selectors(dataset, table): | |
"""Parses the table's schema to generate a list of column selectors. | |
BigQuery tables may have record fields, which have a hierarchical schema. | |
Each subfield is delimited with a dot, but BigQuery views cannot have dots | |
in their names. So, instead of defining the view like: | |
SELECT | |
__key__.namespace, | |
__key__.path | |
FROM | |
[MyTable] | |
You have to define it like: | |
SELECT | |
__key__.namespace as __key___namespace, | |
__key__.path as __key___path | |
FROM | |
[MyTable] | |
For more information, see http://stackoverflow.com/questions/23840038 | |
""" | |
schema = get_table_schema(dataset, table) | |
if not schema: | |
raise BigQueryError('Expected table %s.%s to exist.' % ( | |
dataset, table)) | |
return ",\n".join([ | |
_get_leaf_selectors("", top_field) | |
for top_field in schema["fields"] | |
]) | |
def _get_leaf_selectors(prefix, field): | |
"""Recursive helper for get_leaf_column_selectors()""" | |
field_name = field["name"] | |
if prefix: | |
field_name = prefix + "." + field_name | |
if 'fields' not in field: | |
if "." in field_name: | |
# If we translate user.email to user_email, the user_email field | |
# may already exist as a top-level field, so prepend an underscore | |
# to signify this is a record-turned-regular field. There shouldn't | |
# be any top-level actual fields that start with an underscore. | |
safe_name = field_name.replace(".", "_") | |
return "%s as _%s" % (field_name, safe_name) | |
else: | |
return field_name | |
else: | |
# Recursive case | |
return ",\n".join([ | |
_get_leaf_selectors(field_name, sub_field) | |
for sub_field in field["fields"] | |
]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment