Skip to content

Instantly share code, notes, and snippets.

@fmassot
Last active April 11, 2024 13:16
Show Gist options
  • Save fmassot/f9f97596c4e7548faef5052cc791291b to your computer and use it in GitHub Desktop.
Save fmassot/f9f97596c4e7548faef5052cc791291b to your computer and use it in GitHub Desktop.
Config files used for the Quickwit benchmark on the Github Archive dataset: https://quickwit.io/blog/benchmarking-quickwit-engine-on-an-adversarial-dataset
{
"query": "actor.login:Cyan4973"
}
{
"query": "*",
"max_hits": 0,
"aggs": {
"events": {
"date_histogram": {
"field": "created_at",
"fixed_interval": "1d"
}
}
}
}
{
"query": "actor.login:Cyan4973",
"max_hits": 0,
"aggs": {
"events": {
"date_histogram": {
"field": "created_at",
"fixed_interval": "1d"
}
}
}
}
{
"query": "(payload.description:quickwit OR payload.comment.body:quickwit OR payload.issue.body:quickwit)",
"max_hits": 0,
"aggs": {
"top_repositories": {
"terms": {
"size": 100,
"field": "repo.name",
"order": { "_count": "desc" }
}
}
}
}
#
# Index config file for gh-archive dataset.
#
version: 0.6
index_id: gh-archive-6
doc_mapping:
field_mappings:
- name: id
type: text
- name: type
type: text
fast: true
- name: actor
type: object
field_mappings:
- name: login
type: text
fast: true
- name: repo
type: object
field_mappings:
- name: url
type: text
- name: name
type: text
fast: true
- name: payload
type: object
field_mappings:
- name: description
type: text
- name: pull_request
type: object
field_mappings:
- name: title
type: text
- name: body
type: text
- name: release
type: object
field_mappings:
- name: body
type: text
- name: review
type: object
field_mappings:
- name: body
type: text
- name: comment
type: object
field_mappings:
- name: body
type: text
- name: diff_hunk
type: text
- name: issue
type: object
field_mappings:
- name: title
type: text
- name: body
type: text
- name: created_at
type: datetime
fast: true
indexed: true
input_formats:
- rfc3339
- unix_timestamp
output_format: rfc3339
precision: seconds
stored: true
- name: org
type: object
field_mappings:
- name: login
type: text
fast: true
- name: avatar_url
type: text
- name: url
stored: true
type: text
- name: gravatar_id
type: text
tag_fields: []
store_source: false
timestamp_field: created_at
mode: dynamic
indexing_settings:
commit_timeout_secs: 60
split_num_docs_target: 10000000
merge_policy:
type: limit_merge
max_merge_ops: 0
merge_factor: 10
max_merge_factor: 10
maturation_period: "2days"
resources:
heap_size: 2GB
version: 0.6
source_id: gh-archive-source
source_type: http
desired_num_pipelines: 1
params:
uri_pattern: "https://data.gharchive.org/20{15..24}-{01..13}-{01..32}-{0..24}.json.gz"
image:
repository: quickwit/quickwit
pullPolicy: Always
# Overrides the image tag whose default is the chart appVersion.
tag: qw-http-source
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
# Additional global env
environment:
QW_DISABLE_TELEMETRY: 1
OTEL_EXPORTER_OTLP_ENDPOINT: http://quickwit-indexer.quickwit-prod.svc.cluster.local:7281
QW_ENABLE_OPENTELEMETRY_OTLP_EXPORTER: true
NO_COLOR: true
RUST_LOG: quickwit=info
searcher:
replicaCount: 4
resources:
limits:
memory: 18Gi
cpu: 8
requests:
memory: 2Gi
cpu: 7
nodeSelector:
node.kubernetes.io/instance-type: c5n.2xlarge
indexer:
replicaCount: 1
# Extra env for indexer
extraEnv: {}
resources:
limits:
memory: 8Gi
requests:
memory: 6Gi
persistentVolume:
enabled: true
storage: "250Gi"
nodeSelector:
node.kubernetes.io/instance-type: c5a.xlarge
metastore:
replicaCount: 1
# Extra env for metastore
extraEnv: {}
# KEY: VALUE
resources:
limits:
memory: 1Gi
requests:
memory: 256Mi
control_plane:
# Extra env for searcher
extraEnv: {}
# KEY: VALUE
resources:
limits:
memory: 256Mi
requests:
memory: 25Mi
janitor:
# Enable Janitor service
enabled: true
# Extra env for searcher
extraEnv: {}
# KEY: VALUE
resources:
limits:
memory: 2Gi
requests:
memory: 100Mi
# Quickwit configuration
config:
metastore_uri: s3://gharchive/indexes
postgres: {}
default_index_root_uri: s3://gharchive/indexes
# Indexer settings
indexer:
split_store_max_num_bytes: 200G
split_store_max_num_splits: 200
enable_otlp_endpoint: false
ingest_api:
max_queue_memory_usage: 1GB
max_queue_disk_usage: 2GB
# Searcher settings
searcher:
fast_field_cache_capacity: 0GB # 6G for 1 searcher, 9GB for 2 searchers, 10.75GB for 4 searchers, 12GB for 8 searchers and more
split_footer_cache_capacity: 0GB ##6.5GB for 1 searcher, 3.5GB for 2 searchers, 1.75GB for 4 searchers, 0.9GB otherwise
max_num_concurrent_split_streams: 100
partial_request_cache_capacity: 0
aggregation_memory_limit: 5G
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment