Skip to content

Instantly share code, notes, and snippets.

@vitalibaranov
Created January 20, 2025 08:28
Show Gist options
  • Save vitalibaranov/5f57379001808a14af927f0c916ef569 to your computer and use it in GitHub Desktop.
Save vitalibaranov/5f57379001808a14af927f0c916ef569 to your computer and use it in GitHub Desktop.
Create "persons" index with icu plugin person name handling
curl -XPUT "http://tms17.pyn.ru:7600/persons" -H 'Content-Type: application/json' -d'
{
"mappings": {
"_source": {
"enabled": false
},
"properties": {
"firstName": {
"analyzer": "name_3gram_analyzer",
"type": "text",
"fields": {
"latin": {
"type": "text",
"analyzer": "name_3gram_analyzer_translit"
}
}
},
"lastName": {
"analyzer": "name_3gram_analyzer",
"type": "text",
"fields": {
"latin": {
"type": "text",
"analyzer": "name_3gram_analyzer_translit"
}
}
},
"middleName": {
"analyzer": "name_3gram_analyzer",
"type": "text",
"fields": {
"latin": {
"type": "text",
"analyzer": "name_3gram_analyzer_translit"
}
}
},
"fullName": {
"type": "text",
"analyzer": "lowercase_analyzer",
"fields": {
"latin": {
"type": "text",
"analyzer": "lowercase_analyzer_translit"
}
}
},
"contacts": {
"type": "keyword",
"normalizer": "keyword_lowercase"
},
"hhClientId": {
"type": "integer"
},
"cellPhone": {
"analyzer": "contact_3gram_analyzer",
"type": "text"
},
"workPhone": {
"analyzer": "contact_3gram_analyzer",
"type": "text"
},
"homePhone": {
"analyzer": "contact_3gram_analyzer",
"type": "text"
},
"email": {
"analyzer": "contact_3gram_analyzer",
"type": "text"
},
"consentStatus": {
"type": "keyword"
},
"sourceId": {
"type": "integer"
},
"vacancies": {
"type": "integer"
},
"tags": {
"type": "integer"
},
"mergeable": {
"type": "boolean"
},
"creationTime": {
"type": "date"
},
"editingTime": {
"type": "date"
},
"resumes": {
"analyzer": "ru_full_text",
"type": "text"
},
"comments": {
"analyzer": "ru_full_text",
"type": "text"
},
"hhComments": {
"analyzer": "ru_full_text",
"type": "text"
},
"workflowStatuses": {
"type": "keyword"
},
"currentWorkflowStatuses": {
"type": "keyword"
},
"relocationType": {
"type": "keyword"
},
"area": {
"type": "keyword"
},
"relocationAreas": {
"type": "keyword"
},
"birthDay": {
"type": "date"
},
"discardReasons": {
"type": "integer"
},
"salary": {
"properties": {
"currency": {
"type": "keyword"
},
"amount": {
"type": "integer"
}
}
}
}
},
"settings": {
"number_of_shards": "9",
"number_of_replicas": "2",
"index": {
"search": {
"slowlog": {
"level": "info",
"threshold": {
"fetch": {
"warn": "1s",
"trace": "-1",
"debug": "-1",
"info": "400ms"
},
"query": {
"warn": "3s",
"trace": "-1",
"debug": "-1",
"info": "2s"
}
}
}
},
"analysis": {
"normalizer": {
"keyword_lowercase": {
"type": "custom",
"filter": [
"lowercase"
]
}
},
"analyzer": {
"lowercase_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"icu_transform_to_russian"
],
"tokenizer": "standard"
},
"lowercase_analyzer_translit": {
"type": "custom",
"filter": [
"lowercase",
"icu_transform_to_russian"
],
"tokenizer": "standard"
},
"contact_3gram_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"icu_transform_to_russian"
],
"tokenizer": "contact_3gram"
},
"name_3gram_analyzer": {
"type": "custom",
"char_filter": "mapping_filter",
"filter": [
"lowercase",
"icu_transform_to_russian"
],
"tokenizer": "name_3gram"
},
"name_3gram_analyzer_translit": {
"type": "custom",
"char_filter": "mapping_filter",
"filter": [
"lowercase",
"icu_transform_to_latin"
],
"tokenizer": "name_3gram"
},
"ru_full_text": {
"type": "custom",
"char_filter": [
"html_strip",
"mapping_filter"
],
"filter": [
"lowercase",
"russian_stop",
"russian_stemmer"
],
"tokenizer": "standard"
}
},
"char_filter": {
"mapping_filter": {
"mappings": [
"ё => е"
],
"type": "mapping"
}
},
"filter": {
"russian_stemmer": {
"language": "russian",
"type": "stemmer"
},
"russian_stop": {
"stopwords": "_russian_",
"type": "stop"
},
"icu_transform_to_latin": {
"type": "icu_transform",
"id": "Russian-Latin/BGN"
},
"icu_transform_to_russian": {
"type": "icu_transform",
"id": "Latin-Russian/BGN"
}
},
"tokenizer": {
"contact_3gram": {
"max_gram": 3,
"min_gram": 3,
"token_chars": [],
"type": "ngram"
},
"name_3gram": {
"max_gram": 3,
"min_gram": 3,
"token_chars": [
"letter",
"digit"
],
"type": "ngram"
}
}
}
}
}
}'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment