Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save pmarreck/883541dc632140a1f978705d1353fd8b to your computer and use it in GitHub Desktop.
Save pmarreck/883541dc632140a1f978705d1353fd8b to your computer and use it in GitHub Desktop.
How to set up postgres fulltext search triggers, index, and tsvector column on Elixir/Phoenix, with Ecto querying, including ranking and sorting by rank
defmodule YourAppName.Search do
# ...
@doc """
Queries listings.
"""
def query_listings(query, current_user) do
default_scope = from l in Listing, where: l.draft == false or l.user_id == ^current_user.id, order_by: [desc: l.updated_at], limit: 50
id = _try_integer(query)
lst = _try_listing_status_type(query)
my = _try_mine(query)
pricerange = _try_pricerange(query)
cond do
query == "" -> Repo.all(default_scope) |> Repo.preload([:broker, :user])
id -> [get_listing!(id)] |> Repo.preload([:broker, :user])
lst -> default_scope |> where([l], l.listing_status_type == ^lst) |> Repo.all |> Repo.preload([:broker, :user])
my -> default_scope |> where([l], l.user_id == ^current_user.id) |> Repo.all |> Repo.preload([:broker, :user])
pricerange -> {start, finish} = pricerange; default_scope |> where([l], l.price_usd >= ^start and l.price_usd <= ^finish) |> Repo.all |> Repo.preload([:broker, :user])
true -> search_all_fields_using_postgres_fulltext_search(query, default_scope)
end
end
defp _try_integer(num) when is_integer(num), do: num
defp _try_integer(maybe_num) when is_binary(maybe_num) do
_try_int_result(Integer.parse(maybe_num))
end
defp _try_int_result({num, ""}) do
num
end
defp _try_int_result(_) do
nil
end
defp _try_listing_status_type(maybe_lst) when is_binary(maybe_lst) do
cond do
Enum.member?(EnumMaps.listing_status_types_int_bin, maybe_lst) -> maybe_lst
true -> nil
end
end
defp _try_mine(maybe_my) when is_binary(maybe_my) do
Enum.member?(["my", "mine"], String.downcase(maybe_my))
end
@pricerange_regex ~r/^\$?([0-9,_ ]+)-\$?([0-9,_ ]+)$/
defp _try_pricerange(maybe_pr) when is_binary(maybe_pr) do
case Regex.run(@pricerange_regex, maybe_pr) do
[_, start, finish] -> {_filter_nonnumeric(start), _filter_nonnumeric(finish)}
_ -> nil
end
end
defp _filter_nonnumeric(num) when is_binary(num) do
{num, _} = Integer.parse(Regex.replace(~r/[^0-9]+/, num, ""))
num
end
defp normalization_transformations() do
[
{~r/\s*\<([0-9]+|-)\>\s*/, "<\\1>"},
{~r/"\s*([^"]+?)\s*"/, fn _, phrase -> Regex.replace(~r/ +/, phrase, "<->") end},
{~r/\s+and\s+/i, "&"},
{~r/\s+or\s+/i, "|"},
{~r/\s+or\s+/i, "|"},
{~r/\s*&not\b/i, "&!"},
{~r/\s*\|not\b/i, "|!"},
{~r/\bnot\s+/i, "!"},
{~r/\s*&\s*/, "&"},
{~r/\s*\|\s*/, "|"},
{~r/!\s+/, "!"},
{~r/\s+/, "&"},
]
end
defp normalize_query(q) do
Enum.reduce(normalization_transformations(), String.trim(q), fn({regex, repl}, acc) -> Regex.replace(regex, acc, repl) end)
end
def test_normalize_query() do
test_cases = [
{"a|b"," a or b"},
{"a&b"," a b "},
{"a&!b","a not b"},
{"a&!b","a and not b"},
{"a|!b","a or !b"},
{"a&!b","a ! b"},
{"a<->b|c","\"a b\" or c"},
{"yabba<->dabba<->do&barney", " \" yabba dabba do \" barney "},
{"a<->b|c<->d", "\"a b\" |\"c d\""},
{"a<2>b"," a <2> b"},
{"!b","not b"},
]
for {expected, input} <- test_cases, do: ^expected = normalize_query(input)
true
end
defp search_all_fields_using_postgres_fulltext_search(q, scope) do
q = normalize_query(q)
scope
|> where([l], fragment("search_vector @@ to_tsquery(?)", ^q))
|> order_by([l], [asc: fragment("ts_rank_cd(search_vector, to_tsquery(?), 32)", ^q), desc: l.updated_at])
|> Repo.all
|> Repo.preload([:broker, :user])
end
end
defmodule Appname.SearchTest do
defp random_uniquifying_string do
trunc(:rand.uniform()*100000000000000000) |> Integer.to_string
end
describe "search queries" do
test "listing fulltext search query normalization" do
assert Realtor.test_normalize_query()
end
test "listing query listings with id only" do
listing = listing_fixture() |> Repo.preload(:user)
assert [listing] == Realtor.query_listings("#{listing.id}", listing.user)
end
test "listing query listings with listing status type only" do
listing = listing_fixture() |> Repo.preload(:user)
user = listing.user
assert {:ok, listing} = Realtor.update_listing(listing, %{listing_status_type: "UC"})
assert [listing] == Realtor.query_listings("UC", user)
end
test "listing query listings with my or mine only" do
listing = listing_fixture() |> Repo.preload(:user)
assert [listing] == Realtor.query_listings("my", listing.user)
assert [listing] == Realtor.query_listings("mine", listing.user)
end
test "listing query listings with price range only" do
listing = listing_fixture() |> Repo.preload(:user)
user = listing.user
assert {:ok, listing} = Realtor.update_listing(listing, %{price_usd: 200})
assert [listing] == Realtor.query_listings("150-$250", user)
end
test "listing fulltext search" do
listing = listing_fixture() |> Repo.preload(:user)
user = listing.user
user2 = user_fixture(%{username: "inigo", email: "[email protected]", name: "Inigo Montoya"})
listing2 = listing_fixture(user: user2, user_id: user2.id)
assert {:ok, listing} = Realtor.update_listing(listing, %{draft: false, for_sale: true, description: "This is stupendous!"})
assert {:ok, listing2} = Realtor.update_listing(listing2, %{draft: false, for_sale: true, description: "inconceivable"})
assert [listing] == Realtor.query_listings("stupendous", user)
assert [listing] == Realtor.query_listings("realtortest", user) # by user's name
assert [listing] == Realtor.query_listings("stupendous realtortest", user)
assert [listing] == Realtor.query_listings("stupendous sale", user) # boolean attribute
assert [] == Realtor.query_listings("stupendous not realtortest", user)
assert [listing2, listing] == Realtor.query_listings("stupendous | inconceivable", user2)
end
end
end
defmodule Mpnetwork.Repo.MarryPostgresFulltextListingSearch do
use Ecto.Migration
alias Mpnetwork.EnumMaps
# The migration wherein we marry Postgres because the cost of using another fulltext
# search engine is greater than just using Postgres' built-in (and apparently quite capable)
# fulltext search.
# note that if you add to these later or change the ranks, you'll have to rerun a similar migration
@fulltext_searchable_fields [
address: "A",
city: "B",
state: "B",
zip: "B",
description: "C",
remarks: "C",
association: "C",
neighborhood: "C",
schools: "B",
zoning: "C",
district: "C",
construction: "C",
appearance: "C",
cross_street: "C",
owner_name: "C",
]
@boolean_text_searchable_fields [
studio: "studio",
for_sale: "for sale",
for_rent: "for rent",
basement: "basement",
attached_garage: "attached garage",
new_construction: "new construction",
patio: "patio",
deck: "deck",
pool: "pool",
hot_tub: "hot tub",
porch: "porch",
central_air: "central air",
central_vac: "central vac",
security_system: "security system",
fios_available: "FIOS",
high_speed_internet_available: "high speed internet",
modern_kitchen_countertops: "modern kitchen countertops",
eef_led_lighting: "LED lighting",
tennis_ct: "tennis court",
mbr_first_fl: "master bedroom first floor",
office: "office",
den: "den",
attic: "attic",
finished_basement: "finished basement",
w_w_carpet: "wall to wall carpet",
wood_floors: "wood floors",
dock_rights: "dock rights",
beach_rights: "beach rights",
waterfront: "waterfront",
waterview: "waterview",
bulkhead: "bulkhead",
cul_de_sac: "cul de sac",
corner: "corner",
adult_comm: "adult community",
gated_comm: "gated community",
eat_in_kitchen: "eat-in kitchen",
energy_eff: "energy efficient",
green_certified: "green certified",
eef_geothermal_heating: "geothermal heating",
eef_solar_panels: "solar",
eef_windmill: "windmill",
ing_sprinks: "inground sprinklers",
short_sale: "short sale",
reo: "REO",
handicap_access: "handicapped handicap",
equestrian: "horse",
also_for_rent: "for rent",
buyer_exclusions: "buyer exclusions",
broker_agent_owned: "broker/agent broker agent owned"
]
@enum_text_searchable_fields [
class_type: EnumMaps.class_types,
listing_status_type: EnumMaps.listing_status_types_for_search,
style_type: EnumMaps.style_types
]
@foreign_key_searchable_fields [
user_id: {:users, :name},
broker_id: {:offices, :name}
]
defp assemble_boolean_search_vector(existing_fields, boolean_fields) do
existing_fields ++ Enum.map(boolean_fields, fn {column, text_if_true} ->
"setweight(to_tsvector('pg_catalog.english', (case when new.#{column} then '#{text_if_true}' else '' end)), 'C')"
end)
end
defp assemble_enum_search_vector(existing_fields, enum_fields) do
new_enum_search_vectors = Enum.map(enum_fields, fn {column, int_ext_tuples} ->
full_case = Enum.map(int_ext_tuples, fn {int, ext} ->
"when new.#{column}='#{int}' then '#{ext}'"
end) |> Enum.join(" ")
"setweight(to_tsvector('pg_catalog.english', (case #{full_case} else '' end)), 'C')"
end)
existing_fields ++ new_enum_search_vectors
end
defp assemble_fk_search_vector(existing_fields, fk_fields) do
existing_fields ++ Enum.map(fk_fields, fn {_column, {table, varchar_column}} ->
"setweight(to_tsvector('pg_catalog.english', coalesce(#{table}_#{varchar_column},'')), 'B')"
end)
end
defp assemble_declarations_for_fk_search_vector(fk_fields) do
"DECLARE\n" <> Enum.join(Enum.map(fk_fields, fn {_column, {table, varchar_column}} ->
"#{table}_#{varchar_column} VARCHAR(255);"
end),"\n") <> "\n"
end
defp assemble_select_intos_for_fk_search_vector(fk_fields) do
Enum.join(Enum.map(fk_fields, fn {column, {table, varchar_column}} ->
"SELECT #{table}.#{varchar_column} INTO #{table}_#{varchar_column} FROM #{table} WHERE id = new.#{column};"
end), "\n") <> "\n"
end
defp assemble_search_vector() do
@fulltext_searchable_fields
|> (Enum.map(fn {column, rank} ->
"setweight(to_tsvector('pg_catalog.english', coalesce(new.#{column},'')), '#{rank}')"
end)
|> assemble_fk_search_vector(@foreign_key_searchable_fields)
|> assemble_boolean_search_vector(@boolean_text_searchable_fields)
|> assemble_enum_search_vector(@enum_text_searchable_fields))
|> Enum.join(" || ")
|> String.replace_suffix("", ";")
end
defp assemble_insert_update_trigger_fields(fields) do
fields
|> Enum.map(fn {column, _} ->
"#{column}"
end) |> Enum.join(", ")
end
def change do
alter table(:listings) do
add :search_vector, :tsvector
end
create_if_not_exists index(:listings, [:search_vector], using: "GIN")
# trying to make these idempotent so they can run inside a "change" migration...
# Note that execute/2 requires ecto ~2.2
execute("""
CREATE OR REPLACE FUNCTION listing_search_trigger() RETURNS trigger AS $$
#{assemble_declarations_for_fk_search_vector(@foreign_key_searchable_fields)}
begin
#{assemble_select_intos_for_fk_search_vector(@foreign_key_searchable_fields)}
new.search_vector := #{assemble_search_vector()}
return new;
end
$$ LANGUAGE plpgsql
""","""
DROP FUNCTION IF EXISTS listing_search_trigger();
""")
execute("""
CREATE TRIGGER listing_search_update
BEFORE INSERT OR UPDATE OF #{assemble_insert_update_trigger_fields(@foreign_key_searchable_fields ++ @fulltext_searchable_fields ++ @boolean_text_searchable_fields ++ @enum_text_searchable_fields)}
ON listings
FOR EACH ROW EXECUTE PROCEDURE listing_search_trigger();
""","""
DROP TRIGGER IF EXISTS listing_search_update ON listings;
""")
# now force-update all existing rows to populate search_vector on those rows
field = :erlang.element(1, hd(@fulltext_searchable_fields))
execute("UPDATE listings SET #{field} = #{field}", "")
# some other indices unrelated to fulltext search which just happened to be part of this migration
~w[draft
for_sale
for_rent
inserted_at
updated_at
next_broker_oh_start_at
next_broker_oh_end_at
next_cust_oh_start_at
next_cust_oh_end_at
class_type
listing_status_type
style_type
att_type
price_usd
]a |> Enum.each(fn col ->
create_if_not_exists index(:listings, [col])
end)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment