Skip to content

Instantly share code, notes, and snippets.

@scottrice10
Created September 10, 2013 00:58
Show Gist options
  • Select an option

  • Save scottrice10/6503652 to your computer and use it in GitHub Desktop.

Select an option

Save scottrice10/6503652 to your computer and use it in GitHub Desktop.
Here is a mapping I used to index ~75,000 health provider documents from npiapi.com, a web service providing NPI health provider records in json. Lines 4 and 5: Per recommendations on the Elasticsearch Google Group and by a gist written by Shay Bannon about peak indexing performance, I set a high number of shards and 0 replicas to increase index…
curl -XPUT 'localhost:9200/doctors_index' -d '
{
"settings": {
"number_of_shards": 16,
"number_of_replicas": 0,
"index.refresh_interval": 120,
"analysis": {
"analyzer": {
"autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": ["standard", "lowercase", "kstem", "edgeNGram"]
},
"post_title": {
"filter": ["standard", "lowercase", "asciifolding"],
"type": "custom",
"tokenizer": "standard"
}
},
"filter" : {
"ngram" : {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 15
}
}
}
},
"mappings": {
"doctors" : {
"index_analyzer" : "standard",
"search_analyzer" : "standard",
"_timestamp" : { "enabled" : true },
"properties": {
"names": {
"properties": {
"name_suffix": {
"type": "string"
},
"name_prefix": {
"type": "string"
},
"first_name": {
"type": "multi_field",
"path": "just_name",
"fields": {
"first_name": {
"type": "string"
},
"post_title": {
"type": "string",
"analyzer": "post_title",
"similarity": "BM25"
},
"autocomplete": {
"analyzer": "autocomplete",
"type": "string"
}
}
},
"organization_name_type": {
"type": "string"
},
"middle_name": {
"type": "string"
},
"primary": {
"type": "string"
},
"last_name": {
"type": "multi_field",
"path": "just_name",
"fields": {
"first_name": {
"type": "string"
},
"post_title": {
"type": "string",
"analyzer": "post_title",
"similarity": "BM25"
},
"autocomplete": {
"analyzer": "autocomplete",
"type": "string"
}
}
},
"gender": {
"type": "string"
},
"organization_name": {
"type": "string"
},
"credential": {
"type": "string"
}
}
},
"official": {
"properties": {
"name_suffix": {
"type": "string"
},
"name_prefix": {
"type": "string"
},
"first_name": {
"type": "string"
},
"title": {
"type": "string"
},
"middle_name": {
"type": "string"
},
"last_name": {
"type": "string"
},
"telephone": {
"type": "string"
},
"credential": {
"type": "string"
}
}
},
"addresses": {
"properties": {
"street2": {
"type": "string"
},
"street1": {
"type": "string"
},
"fax": {
"type": "string"
},
"postal_code": {
"type": "long"
},
"state": {
"type": "string"
},
"type": {
"type": "string"
},
"telephone": {
"type": "string"
},
"country_code": {
"type": "string"
},
"city": {
"type": "string"
}
}
},
"entity_type": {
"type": "string"
},
"last_update_date": {
"type": "string"
},
"taxonomies": {
"properties": {
"code": {
"type": "string"
}
}
}
}
}
}
}
'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment