Last active
June 6, 2022 11:54
-
-
Save pmarreck/883541dc632140a1f978705d1353fd8b to your computer and use it in GitHub Desktop.
How to set up postgres fulltext search triggers, index, and tsvector column on Elixir/Phoenix, with Ecto querying, including ranking and sorting by rank
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule YourAppName.Search do | |
# ... | |
@doc """ | |
Queries listings. | |
""" | |
def query_listings(query, current_user) do | |
default_scope = from l in Listing, where: l.draft == false or l.user_id == ^current_user.id, order_by: [desc: l.updated_at], limit: 50 | |
id = _try_integer(query) | |
lst = _try_listing_status_type(query) | |
my = _try_mine(query) | |
pricerange = _try_pricerange(query) | |
cond do | |
query == "" -> Repo.all(default_scope) |> Repo.preload([:broker, :user]) | |
id -> [get_listing!(id)] |> Repo.preload([:broker, :user]) | |
lst -> default_scope |> where([l], l.listing_status_type == ^lst) |> Repo.all |> Repo.preload([:broker, :user]) | |
my -> default_scope |> where([l], l.user_id == ^current_user.id) |> Repo.all |> Repo.preload([:broker, :user]) | |
pricerange -> {start, finish} = pricerange; default_scope |> where([l], l.price_usd >= ^start and l.price_usd <= ^finish) |> Repo.all |> Repo.preload([:broker, :user]) | |
true -> search_all_fields_using_postgres_fulltext_search(query, default_scope) | |
end | |
end | |
defp _try_integer(num) when is_integer(num), do: num | |
defp _try_integer(maybe_num) when is_binary(maybe_num) do | |
_try_int_result(Integer.parse(maybe_num)) | |
end | |
defp _try_int_result({num, ""}) do | |
num | |
end | |
defp _try_int_result(_) do | |
nil | |
end | |
defp _try_listing_status_type(maybe_lst) when is_binary(maybe_lst) do | |
cond do | |
Enum.member?(EnumMaps.listing_status_types_int_bin, maybe_lst) -> maybe_lst | |
true -> nil | |
end | |
end | |
defp _try_mine(maybe_my) when is_binary(maybe_my) do | |
Enum.member?(["my", "mine"], String.downcase(maybe_my)) | |
end | |
@pricerange_regex ~r/^\$?([0-9,_ ]+)-\$?([0-9,_ ]+)$/ | |
defp _try_pricerange(maybe_pr) when is_binary(maybe_pr) do | |
case Regex.run(@pricerange_regex, maybe_pr) do | |
[_, start, finish] -> {_filter_nonnumeric(start), _filter_nonnumeric(finish)} | |
_ -> nil | |
end | |
end | |
defp _filter_nonnumeric(num) when is_binary(num) do | |
{num, _} = Integer.parse(Regex.replace(~r/[^0-9]+/, num, "")) | |
num | |
end | |
defp normalization_transformations() do | |
[ | |
{~r/\s*\<([0-9]+|-)\>\s*/, "<\\1>"}, | |
{~r/"\s*([^"]+?)\s*"/, fn _, phrase -> Regex.replace(~r/ +/, phrase, "<->") end}, | |
{~r/\s+and\s+/i, "&"}, | |
{~r/\s+or\s+/i, "|"}, | |
{~r/\s+or\s+/i, "|"}, | |
{~r/\s*¬\b/i, "&!"}, | |
{~r/\s*\|not\b/i, "|!"}, | |
{~r/\bnot\s+/i, "!"}, | |
{~r/\s*&\s*/, "&"}, | |
{~r/\s*\|\s*/, "|"}, | |
{~r/!\s+/, "!"}, | |
{~r/\s+/, "&"}, | |
] | |
end | |
defp normalize_query(q) do | |
Enum.reduce(normalization_transformations(), String.trim(q), fn({regex, repl}, acc) -> Regex.replace(regex, acc, repl) end) | |
end | |
def test_normalize_query() do | |
test_cases = [ | |
{"a|b"," a or b"}, | |
{"a&b"," a b "}, | |
{"a&!b","a not b"}, | |
{"a&!b","a and not b"}, | |
{"a|!b","a or !b"}, | |
{"a&!b","a ! b"}, | |
{"a<->b|c","\"a b\" or c"}, | |
{"yabba<->dabba<->do&barney", " \" yabba dabba do \" barney "}, | |
{"a<->b|c<->d", "\"a b\" |\"c d\""}, | |
{"a<2>b"," a <2> b"}, | |
{"!b","not b"}, | |
] | |
for {expected, input} <- test_cases, do: ^expected = normalize_query(input) | |
true | |
end | |
defp search_all_fields_using_postgres_fulltext_search(q, scope) do | |
q = normalize_query(q) | |
scope | |
|> where([l], fragment("search_vector @@ to_tsquery(?)", ^q)) | |
|> order_by([l], [asc: fragment("ts_rank_cd(search_vector, to_tsquery(?), 32)", ^q), desc: l.updated_at]) | |
|> Repo.all | |
|> Repo.preload([:broker, :user]) | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule Appname.SearchTest do | |
defp random_uniquifying_string do | |
trunc(:rand.uniform()*100000000000000000) |> Integer.to_string | |
end | |
describe "search queries" do | |
test "listing fulltext search query normalization" do | |
assert Realtor.test_normalize_query() | |
end | |
test "listing query listings with id only" do | |
listing = listing_fixture() |> Repo.preload(:user) | |
assert [listing] == Realtor.query_listings("#{listing.id}", listing.user) | |
end | |
test "listing query listings with listing status type only" do | |
listing = listing_fixture() |> Repo.preload(:user) | |
user = listing.user | |
assert {:ok, listing} = Realtor.update_listing(listing, %{listing_status_type: "UC"}) | |
assert [listing] == Realtor.query_listings("UC", user) | |
end | |
test "listing query listings with my or mine only" do | |
listing = listing_fixture() |> Repo.preload(:user) | |
assert [listing] == Realtor.query_listings("my", listing.user) | |
assert [listing] == Realtor.query_listings("mine", listing.user) | |
end | |
test "listing query listings with price range only" do | |
listing = listing_fixture() |> Repo.preload(:user) | |
user = listing.user | |
assert {:ok, listing} = Realtor.update_listing(listing, %{price_usd: 200}) | |
assert [listing] == Realtor.query_listings("150-$250", user) | |
end | |
test "listing fulltext search" do | |
listing = listing_fixture() |> Repo.preload(:user) | |
user = listing.user | |
user2 = user_fixture(%{username: "inigo", email: "[email protected]", name: "Inigo Montoya"}) | |
listing2 = listing_fixture(user: user2, user_id: user2.id) | |
assert {:ok, listing} = Realtor.update_listing(listing, %{draft: false, for_sale: true, description: "This is stupendous!"}) | |
assert {:ok, listing2} = Realtor.update_listing(listing2, %{draft: false, for_sale: true, description: "inconceivable"}) | |
assert [listing] == Realtor.query_listings("stupendous", user) | |
assert [listing] == Realtor.query_listings("realtortest", user) # by user's name | |
assert [listing] == Realtor.query_listings("stupendous realtortest", user) | |
assert [listing] == Realtor.query_listings("stupendous sale", user) # boolean attribute | |
assert [] == Realtor.query_listings("stupendous not realtortest", user) | |
assert [listing2, listing] == Realtor.query_listings("stupendous | inconceivable", user2) | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule Mpnetwork.Repo.MarryPostgresFulltextListingSearch do | |
use Ecto.Migration | |
alias Mpnetwork.EnumMaps | |
# The migration wherein we marry Postgres because the cost of using another fulltext | |
# search engine is greater than just using Postgres' built-in (and apparently quite capable) | |
# fulltext search. | |
# note that if you add to these later or change the ranks, you'll have to rerun a similar migration | |
@fulltext_searchable_fields [ | |
address: "A", | |
city: "B", | |
state: "B", | |
zip: "B", | |
description: "C", | |
remarks: "C", | |
association: "C", | |
neighborhood: "C", | |
schools: "B", | |
zoning: "C", | |
district: "C", | |
construction: "C", | |
appearance: "C", | |
cross_street: "C", | |
owner_name: "C", | |
] | |
@boolean_text_searchable_fields [ | |
studio: "studio", | |
for_sale: "for sale", | |
for_rent: "for rent", | |
basement: "basement", | |
attached_garage: "attached garage", | |
new_construction: "new construction", | |
patio: "patio", | |
deck: "deck", | |
pool: "pool", | |
hot_tub: "hot tub", | |
porch: "porch", | |
central_air: "central air", | |
central_vac: "central vac", | |
security_system: "security system", | |
fios_available: "FIOS", | |
high_speed_internet_available: "high speed internet", | |
modern_kitchen_countertops: "modern kitchen countertops", | |
eef_led_lighting: "LED lighting", | |
tennis_ct: "tennis court", | |
mbr_first_fl: "master bedroom first floor", | |
office: "office", | |
den: "den", | |
attic: "attic", | |
finished_basement: "finished basement", | |
w_w_carpet: "wall to wall carpet", | |
wood_floors: "wood floors", | |
dock_rights: "dock rights", | |
beach_rights: "beach rights", | |
waterfront: "waterfront", | |
waterview: "waterview", | |
bulkhead: "bulkhead", | |
cul_de_sac: "cul de sac", | |
corner: "corner", | |
adult_comm: "adult community", | |
gated_comm: "gated community", | |
eat_in_kitchen: "eat-in kitchen", | |
energy_eff: "energy efficient", | |
green_certified: "green certified", | |
eef_geothermal_heating: "geothermal heating", | |
eef_solar_panels: "solar", | |
eef_windmill: "windmill", | |
ing_sprinks: "inground sprinklers", | |
short_sale: "short sale", | |
reo: "REO", | |
handicap_access: "handicapped handicap", | |
equestrian: "horse", | |
also_for_rent: "for rent", | |
buyer_exclusions: "buyer exclusions", | |
broker_agent_owned: "broker/agent broker agent owned" | |
] | |
@enum_text_searchable_fields [ | |
class_type: EnumMaps.class_types, | |
listing_status_type: EnumMaps.listing_status_types_for_search, | |
style_type: EnumMaps.style_types | |
] | |
@foreign_key_searchable_fields [ | |
user_id: {:users, :name}, | |
broker_id: {:offices, :name} | |
] | |
defp assemble_boolean_search_vector(existing_fields, boolean_fields) do | |
existing_fields ++ Enum.map(boolean_fields, fn {column, text_if_true} -> | |
"setweight(to_tsvector('pg_catalog.english', (case when new.#{column} then '#{text_if_true}' else '' end)), 'C')" | |
end) | |
end | |
defp assemble_enum_search_vector(existing_fields, enum_fields) do | |
new_enum_search_vectors = Enum.map(enum_fields, fn {column, int_ext_tuples} -> | |
full_case = Enum.map(int_ext_tuples, fn {int, ext} -> | |
"when new.#{column}='#{int}' then '#{ext}'" | |
end) |> Enum.join(" ") | |
"setweight(to_tsvector('pg_catalog.english', (case #{full_case} else '' end)), 'C')" | |
end) | |
existing_fields ++ new_enum_search_vectors | |
end | |
defp assemble_fk_search_vector(existing_fields, fk_fields) do | |
existing_fields ++ Enum.map(fk_fields, fn {_column, {table, varchar_column}} -> | |
"setweight(to_tsvector('pg_catalog.english', coalesce(#{table}_#{varchar_column},'')), 'B')" | |
end) | |
end | |
defp assemble_declarations_for_fk_search_vector(fk_fields) do | |
"DECLARE\n" <> Enum.join(Enum.map(fk_fields, fn {_column, {table, varchar_column}} -> | |
"#{table}_#{varchar_column} VARCHAR(255);" | |
end),"\n") <> "\n" | |
end | |
defp assemble_select_intos_for_fk_search_vector(fk_fields) do | |
Enum.join(Enum.map(fk_fields, fn {column, {table, varchar_column}} -> | |
"SELECT #{table}.#{varchar_column} INTO #{table}_#{varchar_column} FROM #{table} WHERE id = new.#{column};" | |
end), "\n") <> "\n" | |
end | |
defp assemble_search_vector() do | |
@fulltext_searchable_fields | |
|> (Enum.map(fn {column, rank} -> | |
"setweight(to_tsvector('pg_catalog.english', coalesce(new.#{column},'')), '#{rank}')" | |
end) | |
|> assemble_fk_search_vector(@foreign_key_searchable_fields) | |
|> assemble_boolean_search_vector(@boolean_text_searchable_fields) | |
|> assemble_enum_search_vector(@enum_text_searchable_fields)) | |
|> Enum.join(" || ") | |
|> String.replace_suffix("", ";") | |
end | |
defp assemble_insert_update_trigger_fields(fields) do | |
fields | |
|> Enum.map(fn {column, _} -> | |
"#{column}" | |
end) |> Enum.join(", ") | |
end | |
def change do | |
alter table(:listings) do | |
add :search_vector, :tsvector | |
end | |
create_if_not_exists index(:listings, [:search_vector], using: "GIN") | |
# trying to make these idempotent so they can run inside a "change" migration... | |
# Note that execute/2 requires ecto ~2.2 | |
execute(""" | |
CREATE OR REPLACE FUNCTION listing_search_trigger() RETURNS trigger AS $$ | |
#{assemble_declarations_for_fk_search_vector(@foreign_key_searchable_fields)} | |
begin | |
#{assemble_select_intos_for_fk_search_vector(@foreign_key_searchable_fields)} | |
new.search_vector := #{assemble_search_vector()} | |
return new; | |
end | |
$$ LANGUAGE plpgsql | |
""",""" | |
DROP FUNCTION IF EXISTS listing_search_trigger(); | |
""") | |
execute(""" | |
CREATE TRIGGER listing_search_update | |
BEFORE INSERT OR UPDATE OF #{assemble_insert_update_trigger_fields(@foreign_key_searchable_fields ++ @fulltext_searchable_fields ++ @boolean_text_searchable_fields ++ @enum_text_searchable_fields)} | |
ON listings | |
FOR EACH ROW EXECUTE PROCEDURE listing_search_trigger(); | |
""",""" | |
DROP TRIGGER IF EXISTS listing_search_update ON listings; | |
""") | |
# now force-update all existing rows to populate search_vector on those rows | |
field = :erlang.element(1, hd(@fulltext_searchable_fields)) | |
execute("UPDATE listings SET #{field} = #{field}", "") | |
# some other indices unrelated to fulltext search which just happened to be part of this migration | |
~w[draft | |
for_sale | |
for_rent | |
inserted_at | |
updated_at | |
next_broker_oh_start_at | |
next_broker_oh_end_at | |
next_cust_oh_start_at | |
next_cust_oh_end_at | |
class_type | |
listing_status_type | |
style_type | |
att_type | |
price_usd | |
]a |> Enum.each(fn col -> | |
create_if_not_exists index(:listings, [col]) | |
end) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment