Skip to content

Instantly share code, notes, and snippets.

@zw963
Created November 21, 2021 13:50
Show Gist options
  • Save zw963/212654fc1180d39be1d818392c96bb68 to your computer and use it in GitHub Desktop.
Save zw963/212654fc1180d39be1d818392c96bb68 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
begin
require "bundler/inline"
rescue LoadError => e
$stderr.puts "Bundler version 1.10 or later is required. Please update your Bundler"
raise e
end
gemfile(true) do
source "https://rubygems.org"
gem 'sequel_pg', require: 'sequel'
gem 'pg'
gem 'awesome_print'
end
# start code here
# NOTICE:
# For accurately reproduce the problem, zhparser/scws had to be configured.
# wget -q -O - http://www.xunsearch.com/scws/down/scws-1.2.1.tar.bz2 | tar xjf -
# cd scws-1.2.3 ; ./configure && make && sudo env "PATH=$PATH" make install
# wget -q -O - https://github.com/amutu/zhparser/archive/refs/tags/V2.2.tar.gz |tar xzf -
# cd zhparser-2.2 ; make && sudo env "PATH=$PATH" make install
system("sudo -u postgres dropdb --if-exists check_sequel_db")
system("sudo -u postgres createdb check_sequel_db")
DB_URL="postgres://postgres:postgres@localhost:5432/check_sequel_db"
DB = Sequel.connect(DB_URL)
DB.create_table(:investing_latest_news, :ignore_index_errors=>true) do
primary_key :id
String :title, :text=>true, :null=>false
end
DB.run 'CREATE EXTENSION zhparser;'
DB.run "DROP TEXT SEARCH CONFIGURATION IF EXISTS zhparser;"
DB.run "CREATE TEXT SEARCH CONFIGURATION zhparser (PARSER = zhparser);"
DB.run "ALTER TEXT SEARCH CONFIGURATION zhparser ADD MAPPING FOR n,v,a,i,e,l,j,x WITH simple;"
DB.run "ALTER role all SET zhparser.multi_short=on;" # 1 短词复合
DB.run "ALTER role all SET zhparser.multi_duality=on;" # 2 二元复合
DB.run <<'HEREDOC'
ALTER TABLE investing_latest_news
ADD COLUMN textsearchable_index_col tsvector GENERATED ALWAYS
AS
(
to_tsvector(
'zhparser',
coalesce(title, '')
)
)
STORED;
HEREDOC
DB.run 'CREATE INDEX investing_latest_news_textsearch_idx_index ON investing_latest_news USING GIN (textsearchable_index_col);'
class InvestingLatestNews < Sequel::Model
end
InvestingLatestNews.create(
title: 'NYMEX原油仍有可能跌破78.85美元',
)
ap InvestingLatestNews.all
# [
# [0] #<InvestingLatestNews:0x000055e892124528> {
# :id => 1,
# :textsearchable_index_col => "'78':12 '78.85':11 '85':13 'nymex':1 '原油':2 '可':6 '可能':5 '有':4 '油':3 '破':10 '能':7 '跌':9 '跌破':8",
# :title => "NYMEX原油仍有可能跌破78.85美元"
# }
# ]
# NOTICE:
# As we can see, after create above news, "美元",the english equivalent is US dollar,
# is not add into textsearchable_index_col, so we can't find out it when do full text search.
p InvestingLatestNews.where(Sequel.lit('textsearchable_index_col @@ to_tsquery(?)', '美元')).all
# => []
# Add "美元" into user defined user-defined dictionary, and async it.
DB.run(Sequel.lit("INSERT INTO zhparser.zhprs_custom_word values(?) ON CONFLICT DO NOTHING;", '美元'))
DB.run("SELECT sync_zhprs_custom_word();")
# Update title=title, let stored generated column can pick it.
DB.run(Sequel.lit("update investing_latest_news set title=title"))
# Above code Not work, because textsearchable_index_col not updated.
p InvestingLatestNews.where(Sequel.lit('textsearchable_index_col @@ to_tsquery(?)', '美元')).all
# => []
# [1] pry(main)> InvestingLatestNews.all
# [
# #<InvestingLatestNews:0x000055aaaac39678> {
# :id => 1,
# :textsearchable_index_col => "'78':12 '78.85':11 '85':13 'nymex':1 '原油':2 '可':6 '可能':5 '有':4 '油':3 '破':10 '能':7 '跌':9 '跌破':8",
# :title => "NYMEX原油仍有可能跌破78.85美元"
# }
# ]
# Try again use pg directly.
db = PG.connect(URI(DB_URL))
db.exec("UPDATE investing_latest_news SET title = title;")
# it works now.
p InvestingLatestNews.where(Sequel.lit('textsearchable_index_col @@ to_tsquery(?)', '美元')).all
# => [#<InvestingLatestNews @values={:id=>1,...}]
ap InvestingLatestNews.all
# [
# [0] #<InvestingLatestNews:0x000055e891c9d388> {
# :id => 1,
# :textsearchable_index_col => "'78':12 '78.85':11 '85':13 'nymex':1 '原油':2 '可':6 '可能':5 '有':4 '油':3 '破':10 '美元':14 '能':7 '跌':9 '跌破':8",
# :title => "NYMEX原油仍有可能跌破78.85美元"
# }
# ]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment