Last active
February 12, 2021 17:14
-
-
Save kluzny/498e3a3bace43b0e44ce07add44a99fa to your computer and use it in GitHub Desktop.
simple class that takes an active record model and generates pg's similarity based searches
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# frozen_string_literal: true | |
# Uses pg's trigram extension to enable multi column searches | |
# | |
# add a search method to your model | |
# def self.search(query) | |
# SimilaritySearch.new(self, over: [:name, :email, :nickname]).search(query) | |
# end | |
# | |
# Then call it with Foo.search("bar") | |
class SimilaritySearch | |
attr_accessor :klass, :columns, :select | |
def initialize(klass, over:, select: nil) | |
self.klass = klass | |
self.columns = over | |
self.select = select || over | |
end | |
def search(unsafe_query) | |
relation_builder(sanitize(unsafe_query)) | |
end | |
def table_name | |
@table_name ||= klass.table_name | |
end | |
def search_table_name | |
"pg_search" | |
end | |
# "products"."short_name" | |
def table_column(column) | |
%{"#{table_name}"."#{column}"} | |
end | |
# "pg_search"."id" | |
def search_column(column) | |
%{"#{search_table_name}"."#{column}"} | |
end | |
# rubocop:disable Style/LineLength | |
# INNER JOIN ( | |
# SELECT | |
# "products"."id" AS pg_search_id, | |
# (similarity(coalesce("products"."short_name"::text, ''), 'FOO')) AS srank, | |
# (similarity(coalesce("products"."name"::text, ''), 'FOO')) AS nrank, | |
# (similarity((coalesce("products"."short_name"::text, '') || ' ' || coalesce("products"."name"::text, '') || ' ' || coalesce("products"."gmi_code"::text, '') || ' ' || coalesce("products"."currency"::text, '')), 'FOO')) AS rank | |
# FROM "products" | |
# WHERE ( | |
# ( | |
# ( | |
# setweight(to_tsvector('simple', coalesce("products"."short_name"::text, '')), 'A') || setweight(to_tsvector('simple', coalesce("products"."name"::text, '')), 'D') || setweight(to_tsvector('simple', coalesce("products"."gmi_code"::text, '')), 'D') || setweight(to_tsvector('simple', coalesce("products"."currency"::text, '')), 'D')) | |
# @@ | |
# (to_tsquery('simple', ''' ' || 'FOO' || ' ''' || ':*')) | |
# ) | |
# ) | |
# ) AS pg_search ON "products"."id" = pg_search.pg_search_id | |
# rubocop:enable Style/LineLength | |
def inner_join(query) | |
%{ INNER JOIN ( #{inner_query(query)} ) AS #{search_table_name} ON #{table_column(:id)} = #{search_column(:id)} } | |
end | |
# rubocop:disable Style/LineLength | |
# SELECT | |
# "products"."id" AS pg_search_id, | |
# (similarity(coalesce("products"."short_name"::text, ''), 'FOO')) AS srank, | |
# (similarity(coalesce("products"."name"::text, ''), 'FOO')) AS nrank, | |
# (similarity((coalesce("products"."short_name"::text, '') || ' ' || coalesce("products"."name"::text, '') || ' ' || coalesce("products"."gmi_code"::text, '') || ' ' || coalesce("products"."currency"::text, '')), 'FOO')) AS rank | |
# FROM "products" | |
# WHERE ( | |
# ( | |
# ( | |
# setweight(to_tsvector('simple', coalesce("products"."short_name"::text, '')), 'A') || setweight(to_tsvector('simple', coalesce("products"."name"::text, '')), 'D') || setweight(to_tsvector('simple', coalesce("products"."gmi_code"::text, '')), 'D') || setweight(to_tsvector('simple', coalesce("products"."currency"::text, '')), 'D')) | |
# @@ | |
# (to_tsquery('simple', ''' ' || 'FOO' || ' ''' || ':*')) | |
# ) | |
# ) | |
# rubocop:enable Style/LineLength | |
def inner_query(query) | |
relation = klass.select(:id) | |
columns.each do |column| | |
relation = relation.send(:select, rank_selector(column, query)) | |
end | |
relation = relation.where(ts_vector_filter(query)) | |
relation.to_sql | |
end | |
# rubocop:disable Style/LineLength | |
# SELECT | |
# "products"."id", | |
# "products"."name", | |
# "products"."short_name", | |
# pg_search.rank, | |
# pg_search.srank, | |
# pg_search.nrank | |
# FROM "products" | |
# INNER JOIN ( | |
# SELECT | |
# "products"."id" AS pg_search_id, | |
# (similarity(coalesce("products"."short_name"::text, ''), 'FOO')) AS srank, | |
# (similarity(coalesce("products"."name"::text, ''), 'FOO')) AS nrank, | |
# (similarity((coalesce("products"."short_name"::text, '') || ' ' || coalesce("products"."name"::text, '') || ' ' || coalesce("products"."gmi_code"::text, '') || ' ' || coalesce("products"."currency"::text, '')), 'FOO')) AS rank | |
# FROM "products" | |
# WHERE ( | |
# ( | |
# ( | |
# setweight(to_tsvector('simple', coalesce("products"."short_name"::text, '')), 'A') || setweight(to_tsvector('simple', coalesce("products"."name"::text, '')), 'D') || setweight(to_tsvector('simple', coalesce("products"."gmi_code"::text, '')), 'D') || setweight(to_tsvector('simple', coalesce("products"."currency"::text, '')), 'D')) | |
# @@ | |
# (to_tsquery('simple', ''' ' || 'FOO' || ' ''' || ':*')) | |
# ) | |
# ) | |
# ) AS pg_search ON "products"."id" = pg_search.pg_search_id | |
# ORDER BY greatest(pg_search.srank, pg_search.nrank) DESC, "products"."id" ASC | |
# rubocop:enable Style/LineLength | |
def relation_builder(query) | |
klass.select("*").joins(inner_join(query)).order(greatest_rank) | |
end | |
# filter out models without any prefix matches | |
# ( | |
# setweight(to_tsvector('simple', coalesce("products"."short_name"::text, '')), 'A') || | |
# setweight(to_tsvector('simple', coalesce("products"."name"::text, '')), 'D') || | |
# setweight(to_tsvector('simple', coalesce("products"."gmi_code"::text, '')), 'D') || | |
# setweight(to_tsvector('simple', coalesce("products"."currency"::text, '')), 'D') | |
# ) | |
# @@ | |
# ( | |
# to_tsquery('simple', ''' ' || 'FOO' || ' ''' || ':*') | |
# ) | |
def ts_vector_filter(query) | |
%{(#{ts_weighted_columns}) @@ (#{ts_query(query)})} | |
end | |
# setweight(to_tsvector('simple', coalesce("products"."short_name"::text, '')), 'A') || | |
# setweight(to_tsvector('simple', coalesce("products"."name"::text, '')), 'D') || | |
# setweight(to_tsvector('simple', coalesce("products"."gmi_code"::text, '')), 'D') || | |
# setweight(to_tsvector('simple', coalesce("products"."currency"::text, '')), 'D') | |
def ts_weighted_columns | |
columns.map { |column| ts_set_weight(column) }.join(" || ") | |
end | |
def pg_dictionary | |
"simple" | |
end | |
def auto_weight(_column) | |
"A" | |
end | |
# setweight(to_tsvector('simple', coalesce("products"."currency"::text, '')), 'A') | |
def ts_set_weight(column) | |
%{setweight(to_tsvector('#{pg_dictionary}', #{coalesce(column)}), '#{auto_weight(column)}')} | |
end | |
# to_tsquery('simple', ''' ' || 'FOO' || ' ''' || ':*') | |
def ts_query(query) | |
%{(to_tsquery('#{pg_dictionary}', ''' ' || #{query} || ' ''' || ':*'))} | |
end | |
# greatest(name_rank, short_name) DESC | |
# or | |
# short_name DESC | |
def greatest_rank | |
if rank_list.length > 1 | |
%{greatest(#{search_rank_list})} | |
else | |
rank_list.first | |
end + " DESC" | |
end | |
def search_rank_list | |
rank_list.map do |rank| | |
search_column(rank) | |
end.join(",") | |
end | |
# name_rank, short_name | |
def rank_list | |
@rank_list ||= columns.map { |column| rank_name(column) } | |
end | |
# (similarity(coalesce("products"."short_name"::text, ''), 'FOO')) AS short_name_rank, | |
def rank_selector(column, query) | |
%{(#{rank_builder(column, query)}) as #{rank_name(column)}} | |
end | |
# coalesce("products"."currency"::text, '')) | |
def coalesce(column) | |
%{coalesce(#{table_column(column)}, '')} | |
end | |
# (similarity(coalesce("products"."short_name"::text, ''), 'FOO')) | |
def rank_builder(column, query) | |
%{similarity(#{coalesce(column)}, #{query})} | |
end | |
# short_name_rank | |
def rank_name(symbol) | |
"#{symbol}_rank" | |
end | |
# 'FOO' | |
def sanitize(unsafe_query) | |
ActiveRecord::Base.connection.quote(unsafe_query) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment