Skip to content

Instantly share code, notes, and snippets.

@ncalm
Created May 19, 2023 15:43
Show Gist options
  • Select an option

  • Save ncalm/6ac297957cfc3820403533c7f540e174 to your computer and use it in GitHub Desktop.

Select an option

Save ncalm/6ac297957cfc3820403533c7f540e174 to your computer and use it in GitHub Desktop.
Power Query Code for working with CKAN (example is Houston City Council openfinance)
let
/*
An example of how to retrieve CKAN resources from openfinance.houstontx.gov
*/
// URL to get a list of available packages in this CKAN instance
package_list_url = "https://openfinance.houstontx.gov/api/3/action/package_list",
// URL to get the package metadata e.g. id=budget-2019
package_show_url = "https://openfinance.houstontx.gov/api/3/action/package_show?",
// URL to run a query against a table e.g. sql=SELECT blah
sql_query_url = "https://openfinance.houstontx.gov/api/3/action/datastore_search_sql?",
// URL to perform a simple datastore search
datastore_search_url = "https://openfinance.houstontx.gov/api/3/action/datastore_search?",
// generic function to submit a query to a CKAN API and return the result
fn_api_call = (base_url, optional query_record as nullable record) =>
Json.Document(Web.Contents(base_url, [Query=query_record]))[result],
// get the list of available packages
package_list = fn_api_call(package_list_url),
package_list_table = Table.FromList(package_list, Splitter.SplitByNothing(), {"package_name"}),
// Add a column called package_definition which contains a record with all the metadata for that package
add_package = Table.AddColumn(
package_list_table,
"package_definition",
each fn_api_call(package_show_url, [id=[package_name]])
),
/*
Within the package definition record is a list of resource for that package
The assumption here is that for this CKAN instance, there is only one resource per package
Each resource has a resource_id.
The resource_id is the name of the table in the PostgreSQL database that contains the data for that resource
*/
extract_resource_id = Table.AddColumn(
add_package,
"resource_id",
each [package_definition][resources]{0}[resource_id]
),
/*
Using the resource ID, retrieve the data for the resource
This method uses the sql approach, which is handy if we want to add a where clause or do aggregation in-database
*/
get_data = Table.AddColumn(
extract_resource_id,
"data",
each fn_api_call(sql_query_url, [sql="SELECT * FROM """ & [resource_id] & """"])
),
/*
Alternative, simple approach using datastore_search instead of sql_query
*/
get_data_alt = Table.AddColumn(
extract_resource_id,
"data",
each fn_api_call(datastore_search_url, [resource_id=[resource_id]])
),
// Extract the column names from the query result
extract_fields = Table.AddColumn(
get_data,
"query_fields",
each
let
list_of_records = [data][fields],
table_of_records = Table.FromList(list_of_records, Splitter.SplitByNothing()),
expanded = Table.ExpandRecordColumn(table_of_records, "Column1", {"id", "type"}, {"id", "type"})[id]
in
List.Select(expanded, each _ <> "_full_text")
),
// Extract the records from the query result
extract_query_records = Table.AddColumn(
extract_fields,
"query_records",
each
let
list_of_records = [data][records],
table_of_records = Table.FromList(list_of_records, Splitter.SplitByNothing()),
expanded = Table.ExpandRecordColumn(table_of_records, "Column1", [query_fields])
in
expanded
),
result = extract_query_records
in
result
/*
Filters data returned by GetCKANResources for a particular package name prefix (budget, checkbook or payroll)
*/
(package as text) as table =>
let
Source = Table.SelectRows(GetCKANResources, each Text.StartsWith([package_name],package))[query_records],
result = Table.Combine(Source)
in
result
// Example of how to use GetPackage to get only the budget data
let
Source = GetPackage("budget")
in
Source
let
// Function to simplify the extract_table step below
tablechild = (child) => "TABLE.wikitable:nth-child(13) > * > TR > :nth-child(" & Text.From(child) & ")",
// retrieve the content of the wikipedia page
Source = Web.BrowserContents("https://en.wikipedia.org/wiki/List_of_open_government_data_sites"),
// extract the table of governments and which platform they're using
extract_table = Html.Table(
Source,
{
{"Government", tablechild(1)},
{"Website", tablechild(2)},
{"Portal Solution", tablechild(3)}
},
[
RowSelector = "TABLE.wikitable:nth-child(13) > * > TR"
]
),
// select only the CKAN rows
ckan_only = Table.SelectRows(extract_table, each ([Portal Solution] = "CKAN"))
in
ckan_only
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment