Last active
April 6, 2021 07:43
-
-
Save sfkeller/37c94d5b005ca42a531da4d9e877b957 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- | |
-- FUNCTION split_text_to_array(...) | |
-- Accepts a delimited text (string). Returns a sorted array of text. | |
-- | |
--drop function if exists split_text_to_array(text, char(1)); | |
create or replace function split_text_to_array(text_delimited text, delimiter text default ';') | |
returns text[] | |
as $$ | |
select array(select regexp_split_to_table(lower(regexp_replace(text_delimited, '( )|'||delimiter||'$', '', 'g')), delimiter) order by 1); | |
$$ language sql immutable strict parallel safe; | |
-- Testing standalone with messy input data (mostly fom OpenStreetMap) | |
-- Search for 'overlap' of {italian,pizza} with values. Expecting all true escept of last 'italian_pizza' | |
-- The predicate && is usually used in WHERE clause. | |
with tbl (values_semicolon_delimited) as ( | |
values | |
('pizza'), | |
('Pizza'), | |
('pizza '), | |
('pizza ;'), | |
('pizza;italian'), | |
('pizza; italian'), | |
('italian'), | |
('italian;pizza'), | |
('italian;Pizza'), | |
('italian;_pizzeria; swiss'), | |
('italian_pizza;pizza'), | |
('italian.pizza;pizza'), | |
('swiss;american_food;fast_food;pizza'), | |
('pizza;...'), | |
('italian_pizza') | |
) | |
select | |
'>'||values_semicolon_delimited||'<' as values_semicolon_delimited, | |
values_as_array, | |
values_as_array && '{italian,pizza}' as contains_italian_or_pizza | |
from ( | |
select | |
values_semicolon_delimited, | |
-- Old solution | |
--(select array(select unnest(regexp_split_to_array(lower(regexp_replace(values_semicolon_delimited, '( )|;$', '', 'g')), ';')::text[]) order by 1)) as values_as_array | |
-- Better solution replacing unnest & regexp_split_to_array() with just regexp_split_to_table() Thx to david fetter! | |
--array(select regexp_split_to_table(lower(regexp_replace(values_semicolon_delimited, '( )|;$', '', 'g')), ';') order by 1) as values_as_array | |
split_text_to_array(values_semicolon_delimited) as values_as_array | |
from tbl | |
) as tbl_tmp; | |
-- | |
-- Demo: List all OpenStreetMap objects with key cuisine tagged 'italian', | |
-- given table osm_point (used osm2pgsql with OSM country data from download.geofabrik.de). | |
-- | |
select osm_id, name, cuisine_arr, st_astext(geom,7) as geom_wkt | |
from ( | |
select *, split_text_to_array(tags->'cuisine') as cuisine_arr | |
from osm_point | |
where tags->'cuisine' is not null | |
) as tbl | |
where cuisine_arr && '{italian,pizza,italian_pizza}' | |
order by array_length(cuisine_arr,1) desc, cuisine_arr; | |
/* | |
osm_id | name | cuisine_arr | geom_wkt | |
------------+-------------------------------+------------------------------------+----------------------------- | |
4262942591 | Sunset Bar | {burger,coffee_shop,italian_pizza} | POINT(7.0758288 46.1046015) | |
5378024770 | Happy Chef | {burger,italian,kebab} | POINT(8.5351329 47.3679553) | |
6121323299 | Cakibi's | {burger,kebab,pizza} | POINT(8.8897094 47.5575453) | |
5591143159 | Linden Express | {burger,kebab,pizza} | POINT(7.5606657 47.5580886) | |
... | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment