Skip to content

Instantly share code, notes, and snippets.

@csrutil
Created January 11, 2023 12:16
Show Gist options
  • Select an option

  • Save csrutil/2ba751876504114ff0389e9eb5da145d to your computer and use it in GitHub Desktop.

Select an option

Save csrutil/2ba751876504114ff0389e9eb5da145d to your computer and use it in GitHub Desktop.
app_episodes-index-config.yaml
COPY (
select
json_build_object(
'episode_id', id,
'title', REPLACE(
REPLACE(title, '"', ''),
'\n',''
),
'subtitle', REPLACE(
REPLACE(subtitle, '"', ''),
'\n',''
),
'description', REPLACE(
REPLACE(description, '"', ''),
'\n',''
),
'publish_date', extract(epoch from publish_date)::bigint
)
from episodes
) TO '/Volumes/Ram/app_episodes.json';
select
json_build_object(
'episode_id', id,
'title', REGEXP_REPLACE(title, '[^\w\\n]+', '', 'n'),
'subtitle', REGEXP_REPLACE(subtitle, '[^\w\\n]+', '', 'n'),
'description', REGEXP_REPLACE(description, '[^\w\\n]+', '', 'n'),
'publish_date', extract(epoch from publish_date)::bigint
)
from episodes limit 10;
quickwit index ingest --index app_episodes --input-path app_episodes.json
version: 0.4
index_id: app_episodes
doc_mapping:
field_mappings:
- name: episode_id
description: episode id
type: u64
stored: true
indexed: true
fast: true
- name: title
type: text
tokenizer: default
record: position
stored: true
- name: subtitle
type: text
tokenizer: default
record: position
stored: true
- name: description
type: text
tokenizer: default
record: position
stored: true
- name: publish_date
type: datetime
input_formats:
- unix_timestamp
- "%Y %m %d %H:%M:%S.%f"
output_format: unix_timestamp_secs
precision: seconds
fast: true
search_settings:
# If you do not specify fields in your query, those fields will be used.
default_search_fields: [title, description]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment