Last active
December 19, 2024 17:48
-
-
Save gabriel-curtino/9d944dbb2e6fcb941d59a2e23edd2936 to your computer and use it in GitHub Desktop.
Rails SQLite Global Index
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# models/index.rb | |
# == Index Model | |
# | |
# The `Index` model manages full-text search indexes using SQLite's FTS5 (Full-Text Search) extension. | |
# It provides methods for adding, removing, and searching indexed items, as well as managing the underlying database schema. | |
# | |
# ### Key Features | |
# - **Adding Items**: Adds items to the index, ensuring any existing entries are first removed. | |
# - **Removing Items**: Removes items from the index based on their ID and class type. | |
# - **Searching**: Performs full-text searches on the index, returning matching items. | |
# - **Schema Management**: Creates and drops the necessary FTS5 tables. | |
# - **Query Escaping**: Escapes special characters in search queries to ensure compatibility with FTS5 syntax. | |
# - **Content Sanitization**: Removes HTML tags and extra whitespace from content before indexing. | |
# | |
# frozen_string_literal: true | |
class Index < ApplicationRecord | |
class_attribute :index_name, default: "index" | |
class_attribute :tokenizer, default: :trigram | |
ActiveRecord::SchemaDumper.ignore_tables << /^#{index_name}.*$/ | |
class << self | |
def table_name = index_name | |
def skip_schema_dumping = true | |
TOKENIZERS = { | |
porter: "porter unicode61 remove_diacritics 2", | |
unicode: "unicode61 remove_diacritics 2", | |
ascii: "ascii", | |
trigram: "trigram" | |
}.freeze | |
def add(item, content) | |
where(id: item.id, type: item.class.name).delete_all | |
create({ | |
id: item.id, | |
type: item.class.name, | |
content: sanitized_content(content), | |
}) | |
end | |
def remove(item) | |
where(id: item.id, type: item.class.name).delete_all | |
end | |
def search(query, limit: 20) | |
where(%Q("#{index_name}" MATCH ?), escape_fts_query(query)) | |
.order(:rank) | |
.limit(limit) | |
.pluck(:type, :id) | |
.group_by { |item| item.first } | |
.flat_map do |klass, items| | |
klass.constantize.where(id: items.collect { |klass, id| id }) | |
end | |
end | |
def build | |
drop_schema | |
create_schema | |
end | |
alias_method :rebuild, :build | |
def drop | |
drop_schema | |
end | |
private def create_schema | |
# ActiveRecord gets confused with the semicolons making queries fails in some cases. `execute_batch` does the work | |
!!ActiveRecord::Base.connection.send(:execute_batch, [ | |
# Index tables | |
%Q[CREATE VIRTUAL TABLE "#{index_name}" USING fts5(id UNINDEXED, type UNINDEXED, content, tokenize='#{tokenizer}')], | |
%Q[CREATE VIRTUAL TABLE "#{index_name}_row" USING fts5vocab(#{index_name}, row)], | |
%Q[CREATE VIRTUAL TABLE "#{index_name}_instance" USING fts5vocab(#{index_name}, instance)], | |
]) | |
end | |
private def drop_schema | |
# ActiveRecord gets confused with the semicolons making queries fails in some cases. `execute_batch` does the work | |
!!ActiveRecord::Base.connection.send(:execute_batch, [ | |
"DROP TABLE IF EXISTS '#{index_name}_row'", | |
"DROP TABLE IF EXISTS '#{index_name}_instance'", | |
"DROP TABLE IF EXISTS '#{index_name}'", | |
]) | |
end | |
private def escape_fts_query(query) | |
# Define a regex pattern for allowed FTS5 bareword characters | |
allowed_chars = /\A[:A-Za-z0-9_\u0080-\uFFFF\u0026\u007E]+\z/u | |
# Split the query into individual words | |
words = query.split(/\s+/) | |
# Process each word | |
escaped_words = words.map do |word| | |
if word.match(allowed_chars) | |
word | |
else | |
'"' + word.gsub(/"/, '""') + '"' | |
end | |
end | |
# Join the escaped words back into a single string | |
escaped_words.join(' ') | |
end | |
def sanitized_content(content) | |
content.squish.gsub(/<.*?>/, "") | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# models/concerns/indexable.rb | |
## Indexable Concern | |
# | |
# The `Indexable` concern provides a set of methods and callbacks to manage the indexing of model instances. | |
# It integrates with the `Index` model to handle full-text search indexing and deindexing. | |
# | |
# ### Key Features | |
# - **Indexing and Deindexing**: Automatically adds or removes model instances from the index upon save or destruction. | |
# - **Content Extraction**: Allows specifying a method to extract content from the model instance for indexing. | |
# - **Batch Indexing**: Provides methods to index or deindex all instances of a model. | |
# - **Background Jobs**: Uses background jobs to perform indexing and deindexing operations asynchronously. | |
# | |
# frozen_string_literal: true | |
module Indexable | |
extend ActiveSupport::Concern | |
included do | |
def index | |
AddToIndexJob.perform_later(self) | |
end | |
def deindex | |
RemoveFromIndexJob.perform_later(self) | |
end | |
def index_content | |
Index.sanitized_content(public_send(@@index_content_method)) | |
end | |
end | |
class_methods do | |
def index(content_method = :to_content) | |
@@index_content_method = content_method | |
unless Rails.env.test? | |
after_save_commit :index, if: ->(item) { item.respond_to?(:index?) && item.index? } | |
after_destroy_commit :deindex | |
end | |
end | |
def index_all | |
deindex_all | |
Index.create( | |
find_each.collect do |item| | |
{ | |
id: item.id, | |
type: item.class.name, | |
content: Index.sanitized_content(item.public_send(@@index_content_method)), | |
} | |
end | |
) | |
end | |
def deindex_all | |
Index.where(type: name).delete_all | |
end | |
end | |
class AddToIndexJob < ApplicationJob | |
queue_as :default | |
def perform(item) | |
Index.add(item, item.index_content) | |
end | |
end | |
class RemoveFromIndexJob < ApplicationJob | |
queue_as :default | |
def perform(item) | |
Index.remove(item) | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# How to use | |
class CreateIndex < ActiveRecord::Migration[7.1] | |
def up | |
Index.build | |
Guest.index_all | |
end | |
def down | |
Index.drop | |
end | |
end | |
class Guest < ApplicationRecord | |
include Indexable | |
index :to_content | |
def to_content | |
"#{name} #{email} #{phone}" | |
end | |
end | |
class SearchController < ApplicationController | |
def search | |
if params[:query].squish.present? | |
results = Index.search(params[:query]) | |
render turbo_stream: [ | |
turbo_stream.append("search_results_container", | |
partial: "search/results", | |
locals: { | |
results: results, | |
} | |
) | |
] | |
else | |
render turbo_stream: turbo_stream.remove("search_results") | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment