Created
May 19, 2023 15:43
-
-
Save ncalm/6ac297957cfc3820403533c7f540e174 to your computer and use it in GitHub Desktop.
Power Query Code for working with CKAN (example is Houston City Council openfinance)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| let | |
| /* | |
| An example of how to retrieve CKAN resources from openfinance.houstontx.gov | |
| */ | |
| // URL to get a list of available packages in this CKAN instance | |
| package_list_url = "https://openfinance.houstontx.gov/api/3/action/package_list", | |
| // URL to get the package metadata e.g. id=budget-2019 | |
| package_show_url = "https://openfinance.houstontx.gov/api/3/action/package_show?", | |
| // URL to run a query against a table e.g. sql=SELECT blah | |
| sql_query_url = "https://openfinance.houstontx.gov/api/3/action/datastore_search_sql?", | |
| // URL to perform a simple datastore search | |
| datastore_search_url = "https://openfinance.houstontx.gov/api/3/action/datastore_search?", | |
| // generic function to submit a query to a CKAN API and return the result | |
| fn_api_call = (base_url, optional query_record as nullable record) => | |
| Json.Document(Web.Contents(base_url, [Query=query_record]))[result], | |
| // get the list of available packages | |
| package_list = fn_api_call(package_list_url), | |
| package_list_table = Table.FromList(package_list, Splitter.SplitByNothing(), {"package_name"}), | |
| // Add a column called package_definition which contains a record with all the metadata for that package | |
| add_package = Table.AddColumn( | |
| package_list_table, | |
| "package_definition", | |
| each fn_api_call(package_show_url, [id=[package_name]]) | |
| ), | |
| /* | |
| Within the package definition record is a list of resource for that package | |
| The assumption here is that for this CKAN instance, there is only one resource per package | |
| Each resource has a resource_id. | |
| The resource_id is the name of the table in the PostgreSQL database that contains the data for that resource | |
| */ | |
| extract_resource_id = Table.AddColumn( | |
| add_package, | |
| "resource_id", | |
| each [package_definition][resources]{0}[resource_id] | |
| ), | |
| /* | |
| Using the resource ID, retrieve the data for the resource | |
| This method uses the sql approach, which is handy if we want to add a where clause or do aggregation in-database | |
| */ | |
| get_data = Table.AddColumn( | |
| extract_resource_id, | |
| "data", | |
| each fn_api_call(sql_query_url, [sql="SELECT * FROM """ & [resource_id] & """"]) | |
| ), | |
| /* | |
| Alternative, simple approach using datastore_search instead of sql_query | |
| */ | |
| get_data_alt = Table.AddColumn( | |
| extract_resource_id, | |
| "data", | |
| each fn_api_call(datastore_search_url, [resource_id=[resource_id]]) | |
| ), | |
| // Extract the column names from the query result | |
| extract_fields = Table.AddColumn( | |
| get_data, | |
| "query_fields", | |
| each | |
| let | |
| list_of_records = [data][fields], | |
| table_of_records = Table.FromList(list_of_records, Splitter.SplitByNothing()), | |
| expanded = Table.ExpandRecordColumn(table_of_records, "Column1", {"id", "type"}, {"id", "type"})[id] | |
| in | |
| List.Select(expanded, each _ <> "_full_text") | |
| ), | |
| // Extract the records from the query result | |
| extract_query_records = Table.AddColumn( | |
| extract_fields, | |
| "query_records", | |
| each | |
| let | |
| list_of_records = [data][records], | |
| table_of_records = Table.FromList(list_of_records, Splitter.SplitByNothing()), | |
| expanded = Table.ExpandRecordColumn(table_of_records, "Column1", [query_fields]) | |
| in | |
| expanded | |
| ), | |
| result = extract_query_records | |
| in | |
| result |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Filters data returned by GetCKANResources for a particular package name prefix (budget, checkbook or payroll) | |
| */ | |
| (package as text) as table => | |
| let | |
| Source = Table.SelectRows(GetCKANResources, each Text.StartsWith([package_name],package))[query_records], | |
| result = Table.Combine(Source) | |
| in | |
| result |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Example of how to use GetPackage to get only the budget data | |
| let | |
| Source = GetPackage("budget") | |
| in | |
| Source |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| let | |
| // Function to simplify the extract_table step below | |
| tablechild = (child) => "TABLE.wikitable:nth-child(13) > * > TR > :nth-child(" & Text.From(child) & ")", | |
| // retrieve the content of the wikipedia page | |
| Source = Web.BrowserContents("https://en.wikipedia.org/wiki/List_of_open_government_data_sites"), | |
| // extract the table of governments and which platform they're using | |
| extract_table = Html.Table( | |
| Source, | |
| { | |
| {"Government", tablechild(1)}, | |
| {"Website", tablechild(2)}, | |
| {"Portal Solution", tablechild(3)} | |
| }, | |
| [ | |
| RowSelector = "TABLE.wikitable:nth-child(13) > * > TR" | |
| ] | |
| ), | |
| // select only the CKAN rows | |
| ckan_only = Table.SelectRows(extract_table, each ([Portal Solution] = "CKAN")) | |
| in | |
| ckan_only |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment