-
-
Save Laxman-SM/7f228e1ffe1d9a6a4e9c1fddf9391b19 to your computer and use it in GitHub Desktop.
solr stuff
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######################################################################## | |
######################################################################## | |
## Run this full block to set the environent for talking to solr via curl | |
## - must be on an Ambari Infra host | |
######################################################################## | |
## authenticate | |
sudo -u infra-solr -i | |
keytab=/etc/security/keytabs/ambari-infra-solr.service.keytab | |
kinit -kt ${keytab} $(klist -kt ${keytab}| awk '{print $NF}'|tail -1) | |
source /etc/ambari-infra-solr/conf/infra-solr-env.sh | |
#export SOLR_ZK_CREDS_AND_ACLS="${SOLR_AUTHENTICATION_OPTS}" | |
## set the collection to work with | |
collection="audit_logs" | |
collection="hadoop_logs" | |
collection="history" | |
collection="ranger_audits" | |
solr_curl_opts="&wt=json&indent=on" | |
function solr_curl(){ | |
## $1: the URL (without host and port) | |
## $2: (optional) options to curl | |
[[ "$1" ]] || { echo "Error: Missing query" >&2; return 1; } | |
echo curl -sSg -u: --negotiate "http://$(hostname -f):8886${1}${solr_curl_opts}" ${2} | |
curl -sSg -u: --negotiate "http://$(hostname -f):8886${1}${solr_curl_opts}" ${2} | |
} | |
######################################################################## | |
######################################################################## | |
## operational commands | |
######################################################################## | |
## status of collection | |
solr_curl "/solr/admin/collections?action=CLUSTERSTATUS&collection=${collection}" | |
## temporarily change logging level (lost when service restarts) | |
solr_curl_opts="" solr_curl "/solr/admin/info/logging" '--data-binary "set=root:WARN&wt=json"' | |
## some other admin commands | |
solr_curl "/solr/admin/collections?action=CLUSTERSTATUS" | |
solr_curl "/solr/admin/collections?action=LIST" | |
solr_curl "/solr/admin/cores?action=STATUS" | |
solr_curl "/solr/admin/cores?action=STATUS&memory=true" | |
solr_curl "/solr/admin/collections?action=OVERSEERSTATUS" | |
## blah needed to disable the ${solr_curl_opts} | |
solr_curl "/solr/${collection}/schema?blah" | |
solr_curl "/solr/${collection}/config?blah" | |
######################################################################## | |
######################################################################## | |
## SolrCLI commands (doesn't seem to work with kerberos) | |
/usr/lib/ambari-infra-solr/bin/solr healthcheck -c ranger_audits -z "${ZK_HOST}" | |
######################################################################## | |
######################################################################## | |
## zookeeper (zk): examples for getting and changing configs | |
solrzk="/usr/lib/ambari-infra-solr/server/scripts/cloud-scripts/zkcli.sh -zkhost ${ZK_HOST}" | |
## get solrconfig.xml | |
${solrzk} -cmd getfile /configs/ranger_audits/solrconfig.xml solrconfig.xml | |
## edit file | |
# vi solrconfig.xml | |
## upload updated solrconfig.xml | |
${solrzk} -cmd putfile /configs/ranger_audits/solrconfig.xml solrconfig.xml | |
## reload cores for changes to apply | |
solr_curl "/solr/admin/collections?action=RELOAD&name=${collection}&async=reload" | |
## others | |
${solrzk} -cmd getfile /configs/ranger_audits/solrconfig.xml.j2 solrconfig.xml.j2 | |
${solrzk} -cmd getfile /configs/ranger_audits/managed-schema managed-schema | |
${solrzk} -cmd putfile /configs/ranger_audits/managed-schema managed-schema.new | |
${solrzk} -cmd get /configs/ranger_audits/elevate.xml >/tmp/elevate.xml | |
${solrzk} -cmd get /configs/ranger_audits/schema.xml >/tmp/schema.xml | |
############################################################################# | |
############################################################################# | |
## queries | |
############################################################################# | |
## select all: oldest | |
solr_curl "/solr/${collection}/select?q=*:*&sort=evtTime+asc&rows=1" | |
# select all: newest | |
solr_curl "/solr/${collection}/select?q=*:*&sort=evtTime+desc&rows=1" | |
## make query work if a shard is down | |
solr_curl "/solr/${collection}/selcet?q=*:*&rows=1" "&shards.tolerant=true" | |
## Query by date/time range | |
solr_curl "/solr/${collection}/select?q=evtTime:[2018-01-16T00:00:00.00Z+TO+2018-01-16T23:59:59Z]&rows=1&sort=evtTime+desc" | |
## Query by age (older than 7 days) | |
solr_curl "/solr/${collection}/select?q=(evtTime:[*+TO+NOW/DAY-7DAYS])&rows=1&sort=evtTime+asc" | |
## Delete by date/time range | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(evtTime:[2018-01-16T00:00:00.00Z+TO+2018-01-16T23:59:59Z])</query></delete>" | |
## Delete by age (older than 7 days) | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(evtTime:[*+TO+NOW/DAY-7DAYS])</query></delete>" | |
## count of documents per repo | |
solr_curl "/solr/${collection}/query?q=*:*&fl=repo&group.field=repoType&group.ngroups=true&group=true&rows=1000" | |
## count of documents per _ttl_ | |
solr_curl "/solr/${collection}/query?q=*:*&fl=_ttl_&group.field=_ttl_&group.ngroups=true&group=true&rows=1000" | |
## count of documents from hive user per repo | |
solr_curl "/solr/${collection}/query?q=reqUser:hive&fl=reqUser,repo&group.field=repoType&group.ngroups=true&group=true&rows=1000" | |
######################################################################## | |
######################################################################## | |
## Various commands | |
######################################################################## | |
## rebalance leaders | |
solr_curl "/solr/admin/collections?action=REBALANCELEADERS&collection=${collection}" | |
solr_curl "/solr/admin/collections?action=MODIFYCOLLECTION&collection=${collection}&maxShardsPerNode=30" | |
solr_curl "/solr/admin/cores?action=REQUESTRECOVERY&core=core_node3" | |
## Purge deleted docs. WARNING: This is very resource intensive. Typically 2x+ disk space. | |
solr_curl "/solr/${collection}/update?optimize=true" | |
solr_curl "/solr/${collection}/update/?commit=true&expungeDeletes=true&waitSearcher=false" | |
######################################################################## | |
######################################################################## | |
## Split shard | |
######################################################################## | |
shard=shard3_1 ## shard to split | |
## Split | |
solr_curl "/solr/admin/collections?action=SPLITSHARD&collection=${collection}&shard=${shard}&async=split_${shard}" | |
## Check status | |
solr_curl "/solr/admin/collections?action=REQUESTSTATUS&requestid=split_${shard}" | |
## Check state of shards | |
## look for: | |
## - old shard is "inactive" | |
## - all other shards are "active" | |
## - the new shards replicas are not co-located | |
## - if co-located, you'll need to replace those replicas (see next section). | |
solr_curl "/solr/admin/collections?action=CLUSTERSTATUS&collection=${collection}&shard=${shard}" | |
## Once it's "inactive", delete original shard | |
solr_curl "/solr/admin/collections?action=DELETESHARD&shard=${shard}&collection=${collection}&async=delete_${shard}" | |
solr_curl "/solr/admin/collections?action=REQUESTSTATUS&requestid=delete_${shard}" | |
######################################################################## | |
######################################################################## | |
## Delete, Add (Replace) replica | |
######################################################################## | |
shard=shard5 | |
replica=core_node2 ## replica to delete | |
node="host1.domain.tld:8886_solr" ## node to add the new replica to | |
## Add | |
solr_curl "/solr/admin/collections?action=ADDREPLICA&collection=${collection}&shard=${shard}&node=${node}&async=addreplica_${shard}" | |
## Delete | |
solr_curl "/solr/admin/collections?action=DELETEREPLICA&collection=${collection}&shard=${shard}&replica=${replica}&async=delreplica_${replica}" | |
## If that fails, try unloading the core | |
solr_curl "/solr/admin/cores?action=UNLOAD&core=${replica}&deleteIndex=true&deleteDataDir=true&deleteInstanceDir=true&sync=unload_${replica}" | |
######################################################################## | |
######################################################################## | |
## Ranger example audit | |
######################################################################## | |
``` | |
{ | |
"id":"8e4bda76-e410-443d-bd95-e21fc8a87da7-655763", | |
"access":"USE", | |
"enforcer":"ranger-acl", | |
"repo":"Dev_hive", | |
"sess":"fc7515c1-8ad5-2860-2eb2-2fa9f7e266b9", | |
"reqUser":"user1", | |
"reqData":"SHOW TABLES IN `somedb`", | |
"resource":"somedb", | |
"cliIP":"10.0.0.141", | |
"logType":"RangerAudit", | |
"result":1, | |
"policy":18, | |
"repoType":3, | |
"resType":"@database", | |
"action":"_any", | |
"evtTime":"2017-10-04T00:07:01.827Z", | |
"seq_num":1311527, | |
"event_count":1, | |
"event_dur_ms":0, | |
"cluster":"Dev", | |
"_ttl_":"+14DAYS", | |
"_expire_at_":"2017-10-18T20:23:01.73Z", | |
"_version_":1583168337246945281 | |
}, | |
``` | |
######################################################################## | |
######################################################################## | |
## offline checkindex and repair | |
## - https://lucene.apache.org/core/4_3_0/core/org/apache/lucene/index/CheckIndex.html | |
######################################################################## | |
lucene="/usr/lib/ambari-infra-solr/server/solr-webapp/webapp/WEB-INF/lib/lucene-core-5.5.2.jar" | |
index="/data/ambari_infra_solr/data/ranger_audits_shard2_replica1/data/index.20171008084444821" | |
## check: | |
java -cp ${lucene} -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex /data/ambari_infra_solr/data/ranger_audits_shard1_replica1/data/index | |
## fix: | |
java -cp ${lucene} -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex ${index} -exorcise | |
######################################################################## | |
######################################################################## | |
## Resources: | |
## - docValue fixes for ranger_audits: https://risdenk.github.io/2017/12/18/ambari-infra-solr-ranger.html | |
## - https://github.com/apache/lucene-solr/blob/master/dev-tools/size-estimator-lucene-solr.xls | |
######################################################################## | |
######################################################################## | |
######################################################################## | |
## Improve ranger_audits config | |
## - https://www.slideshare.net/sematext/solr-search-engine-optimize-is-not-bad-for-you | |
## - http://lucene.apache.org/solr/5_5_0/changes/Changes.html#v5.5.0.upgrading_from_solr_5.4 | |
## - https://apache.googlesource.com/lucene-solr/+/branch_5x/solr/CHANGES.txt see section "Upgrading from Solr 5.4" | |
######################################################################## | |
<!-- Expert: Merge Policy | |
The Merge Policy in Lucene controls how merging of segments is done. | |
--> | |
<mergePolicyFactory class="org.apache.solr.index.TieredMergePolicyFactory"> | |
<!-- "ranger_audit_logs_merge_factor" is set to 5 by default to trigger purging of deleted documents more often --> | |
<int name="maxMergeAtOnce">{{ranger_audit_logs_merge_factor}}</int><!-- Solr default: 10 --> | |
<int name="segmentsPerTier">{{ranger_audit_logs_merge_factor}}</int><!-- Solr default: 10 --> | |
<int name="maxMergeAtOnceExplicit">30</int><!-- Solr default: 30 --> | |
<int name="floorSegmentMB">2048</int><!-- Solr default: 2048 --> | |
<int name="maxMergedSegmentMB">5120</int><!-- Solr default: 5120 --> | |
<double name="reclaimDeletesWeight">2.0</double><!-- Solr default: 2.0 --> | |
<double name="forceMergeDeletesPctAllowed">10.0</double><!-- Solr default: 10.0 --> | |
</mergePolicyFactory> | |
<mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"> | |
<int name="maxMergeCount">4</int> | |
<int name="maxThreadCount">4</int> | |
</mergeScheduler> | |
############################################################################# | |
############################################################################# | |
## ranger_audits delete/clean-up | |
############################################################################# | |
## Assumes Ambari cluster name is the same as the HDFS nameservice | |
cluster="$(hdfs getconf -confKey dfs.internal.nameservices)" | |
## HDFS: /hdp/apps | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:yarn)AND(repo:${cluster}_hadoop)AND(resource:\/hdp\/apps\/*)</query></delete>" | |
## HDFS: /app-logs | |
solr_curl "/solr/${collection}/select?q=resource:\/app-logs\/*+AND+reqUser:yarn&rows=1" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:yarn)AND(repo:${cluster}_hadoop)AND(resource:\/app-logs\/*)</query></delete>" | |
## HDFS: ats | |
solr_curl "/solr/${collection}/select?q=resource:\/ats*+AND+reqUser:yarn&rows=1" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:yarn)AND(repo:${cluster}_hadoop)AND(resource:\/ats*)</query></delete>" | |
## HDFS: hive | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:hive)AND(repo:${cluster}_hadoop)AND(resource:\/hdp\/apps\/*)</query></delete>" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:hive)AND(repo:${cluster}_hadoop)AND(resource:\/app-logs\/*)</query></delete>" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:hive)AND(repo:${cluster}_hadoop)AND(resource:\/apps\/hive\/*)</query></delete>" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:hive)AND(repo:${cluster}_hadoop)AND(resource:\/apps\/ez\/*)</query></delete>" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:hive)AND(repo:${cluster}_hadoop)AND(resource:\/data\/*)</query></delete>" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:hive)AND(repo:${cluster}_hadoop)AND(resource:\/user\/hive\/*)</query></delete>" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:hive)AND(repo:${cluster}_hadoop)AND(resource:\/tmp\/hive\/*)</query></delete>" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:hive)AND(repo:${cluster}_hadoop)AND(resource:\/ats*)</query></delete>" | |
## HDFS: hbase | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:hbase)AND(repo:${cluster}_hadoop)AND(resource:\/apps\/hbase\/*)</query></delete>" | |
## HDFS: spark | |
solr_curl "/solr/${collection}/select?q=resource:\/spark*+AND+reqUser:spark" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:spark)AND(repo:${cluster}_hadoop)AND(resource:\/spark*)</query></delete>" | |
## YARN: | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(repo:${cluster}_yarn)AND(reqUser:livy)</query></delete>" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(repo:${cluster}_yarn)AND(reqUser:hive)</query></delete>" | |
## HBase: atlas | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(repo:${cluster}_hbase)AND(resource:atlas_*)AND(reqUser:atlas)</query></delete>" | |
## HBase: atlas | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(repo:${cluster}_hbase)AND(resource:ATLAS_*)AND(reqUser:hbase)</query></delete>" | |
## Hbase | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(repo:${cluster}_hbase)AND(reqUser:hbase)</query></delete>" | |
## Kafka: atlas | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(repo:${cluster}_kafka)AND(resource:ATLAS_*)AND(reqUser:atlas)</query></delete>" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(repo:${cluster}_kafka)AND(resource:ATLAS_*)AND(reqUser:rangertagsync)</query></delete>" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(repo:${cluster}_kafka)AND(resource:__consumer_offsets)AND(reqUser:kafka)</query></delete>" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(repo:${cluster}_kafka)AND(resource:ATLAS_HOOK)AND(reqUser:hive)</query></delete>" | |
## Other | |
solr_curl "/solr/${collection}/select?q=resource:\/user\/oozie\/*+AND+reqUser:oozie" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:oozie)AND(repo:${cluster}_hadoop)AND(resource:\/user\/oozie\/*)</query></delete>" | |
solr_curl "/solr/${collection}/select?q=resource:\/user\/ams\/*+AND+reqUser:ams" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:ams)AND(repo:${cluster}_hadoop)AND(resource:\/user\/ams\/*)</query></delete>" | |
solr_curl "/solr/${collection}/select?q=resource:\/mr-history*+AND+reqUser:mapred" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:mapred)AND(repo:${cluster}_hadoop)AND(resource:\/mr-history*)</query></delete>" | |
solr_curl "/solr/${collection}/update/?commit=true&stream.body=<delete><query>(reqUser:hive)AND(repo:${cluster}_hadoop)AND(resource:\/mr-history*)</query></delete>" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
When Solr crashes, typically due to an OOM, some of the replicas will go into a bad state ('down' or 'recovery'). | |
All you can due is give Solr room to clean itself up along with a few nudges. | |
1. Check for any non-solr problems. Such as disk issues. Lack of memory... | |
2. Check solr logs. Typically doesn't indicate much, but worth checking. | |
3. Free disk space to give Solr & HDFS room to breathe: | |
- Check /var/log/hadoop/hdfs/audit/solr/spool/archive. These have already been written so can be removed. | |
- Generally clear up /var/log | |
- Clear any unnecesssary files from the partition where solr data resides. Potentially grow the partition. | |
4. Run the setup block from solr.txt | |
5. Check the state of the replicas with CLUSTERSTATUS: | |
``` | |
solr_curl "/solr/admin/collections?action=CLUSTERSTATUS&collection=${collection}" | |
``` | |
6. If a Shard has 1 'active' replica but the others are 'down|recovering', then replace that replica instead of waiting for it to recover: | |
- See 'Replace' section in solr.txt | |
7. If Solr has been up for more than a few minutes, but a shard has both replicas down with no leader then: | |
- Stop the Solr instances of those replicas. Potentially all of the Solr instances. Start only 1 at a time. Hopefully it becomes active first. | |
8. Could change the order of steps 2&3 above if the majority of shards are completely down. | |
9. If there is no chance of recovery: | |
- Replace the problem shards or the entire collection. | |
- If replacing the problem collection: Delete it and restart Ranger which will cause it to be recreated. | |
10. If Solr is down too long, the HDFS spool /var/log/hadoop/hdfs/audit/solr/spool/ may get too big for HDFS to process. | |
- Likely have to delete it which means loss of audits to solr. | |
- Check it's disk space with `du` not `ls`. It is a sparse file so `ls` will not be accurate. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment