Last active
April 11, 2024 13:16
-
-
Save fmassot/f9f97596c4e7548faef5052cc791291b to your computer and use it in GitHub Desktop.
Config files used for the Quickwit benchmark on the Github Archive dataset: https://quickwit.io/blog/benchmarking-quickwit-engine-on-an-adversarial-dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"query": "actor.login:Cyan4973" | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"query": "*", | |
"max_hits": 0, | |
"aggs": { | |
"events": { | |
"date_histogram": { | |
"field": "created_at", | |
"fixed_interval": "1d" | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"query": "actor.login:Cyan4973", | |
"max_hits": 0, | |
"aggs": { | |
"events": { | |
"date_histogram": { | |
"field": "created_at", | |
"fixed_interval": "1d" | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"query": "(payload.description:quickwit OR payload.comment.body:quickwit OR payload.issue.body:quickwit)", | |
"max_hits": 0, | |
"aggs": { | |
"top_repositories": { | |
"terms": { | |
"size": 100, | |
"field": "repo.name", | |
"order": { "_count": "desc" } | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Index config file for gh-archive dataset. | |
# | |
version: 0.6 | |
index_id: gh-archive-6 | |
doc_mapping: | |
field_mappings: | |
- name: id | |
type: text | |
- name: type | |
type: text | |
fast: true | |
- name: actor | |
type: object | |
field_mappings: | |
- name: login | |
type: text | |
fast: true | |
- name: repo | |
type: object | |
field_mappings: | |
- name: url | |
type: text | |
- name: name | |
type: text | |
fast: true | |
- name: payload | |
type: object | |
field_mappings: | |
- name: description | |
type: text | |
- name: pull_request | |
type: object | |
field_mappings: | |
- name: title | |
type: text | |
- name: body | |
type: text | |
- name: release | |
type: object | |
field_mappings: | |
- name: body | |
type: text | |
- name: review | |
type: object | |
field_mappings: | |
- name: body | |
type: text | |
- name: comment | |
type: object | |
field_mappings: | |
- name: body | |
type: text | |
- name: diff_hunk | |
type: text | |
- name: issue | |
type: object | |
field_mappings: | |
- name: title | |
type: text | |
- name: body | |
type: text | |
- name: created_at | |
type: datetime | |
fast: true | |
indexed: true | |
input_formats: | |
- rfc3339 | |
- unix_timestamp | |
output_format: rfc3339 | |
precision: seconds | |
stored: true | |
- name: org | |
type: object | |
field_mappings: | |
- name: login | |
type: text | |
fast: true | |
- name: avatar_url | |
type: text | |
- name: url | |
stored: true | |
type: text | |
- name: gravatar_id | |
type: text | |
tag_fields: [] | |
store_source: false | |
timestamp_field: created_at | |
mode: dynamic | |
indexing_settings: | |
commit_timeout_secs: 60 | |
split_num_docs_target: 10000000 | |
merge_policy: | |
type: limit_merge | |
max_merge_ops: 0 | |
merge_factor: 10 | |
max_merge_factor: 10 | |
maturation_period: "2days" | |
resources: | |
heap_size: 2GB |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: 0.6 | |
source_id: gh-archive-source | |
source_type: http | |
desired_num_pipelines: 1 | |
params: | |
uri_pattern: "https://data.gharchive.org/20{15..24}-{01..13}-{01..32}-{0..24}.json.gz" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
image: | |
repository: quickwit/quickwit | |
pullPolicy: Always | |
# Overrides the image tag whose default is the chart appVersion. | |
tag: qw-http-source | |
imagePullSecrets: [] | |
nameOverride: "" | |
fullnameOverride: "" | |
# Additional global env | |
environment: | |
QW_DISABLE_TELEMETRY: 1 | |
OTEL_EXPORTER_OTLP_ENDPOINT: http://quickwit-indexer.quickwit-prod.svc.cluster.local:7281 | |
QW_ENABLE_OPENTELEMETRY_OTLP_EXPORTER: true | |
NO_COLOR: true | |
RUST_LOG: quickwit=info | |
searcher: | |
replicaCount: 4 | |
resources: | |
limits: | |
memory: 18Gi | |
cpu: 8 | |
requests: | |
memory: 2Gi | |
cpu: 7 | |
nodeSelector: | |
node.kubernetes.io/instance-type: c5n.2xlarge | |
indexer: | |
replicaCount: 1 | |
# Extra env for indexer | |
extraEnv: {} | |
resources: | |
limits: | |
memory: 8Gi | |
requests: | |
memory: 6Gi | |
persistentVolume: | |
enabled: true | |
storage: "250Gi" | |
nodeSelector: | |
node.kubernetes.io/instance-type: c5a.xlarge | |
metastore: | |
replicaCount: 1 | |
# Extra env for metastore | |
extraEnv: {} | |
# KEY: VALUE | |
resources: | |
limits: | |
memory: 1Gi | |
requests: | |
memory: 256Mi | |
control_plane: | |
# Extra env for searcher | |
extraEnv: {} | |
# KEY: VALUE | |
resources: | |
limits: | |
memory: 256Mi | |
requests: | |
memory: 25Mi | |
janitor: | |
# Enable Janitor service | |
enabled: true | |
# Extra env for searcher | |
extraEnv: {} | |
# KEY: VALUE | |
resources: | |
limits: | |
memory: 2Gi | |
requests: | |
memory: 100Mi | |
# Quickwit configuration | |
config: | |
metastore_uri: s3://gharchive/indexes | |
postgres: {} | |
default_index_root_uri: s3://gharchive/indexes | |
# Indexer settings | |
indexer: | |
split_store_max_num_bytes: 200G | |
split_store_max_num_splits: 200 | |
enable_otlp_endpoint: false | |
ingest_api: | |
max_queue_memory_usage: 1GB | |
max_queue_disk_usage: 2GB | |
# Searcher settings | |
searcher: | |
fast_field_cache_capacity: 0GB # 6G for 1 searcher, 9GB for 2 searchers, 10.75GB for 4 searchers, 12GB for 8 searchers and more | |
split_footer_cache_capacity: 0GB ##6.5GB for 1 searcher, 3.5GB for 2 searchers, 1.75GB for 4 searchers, 0.9GB otherwise | |
max_num_concurrent_split_streams: 100 | |
partial_request_cache_capacity: 0 | |
aggregation_memory_limit: 5G | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment