Created
June 13, 2012 19:26
-
-
Save jeremy/2925923 to your computer and use it in GitHub Desktop.
elasticsearch index thread hung on UAX29URLEmailTokenizer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2012-06-13_17:26:28.64289 "elasticsearch[index]-pool-2-thread-17977" daemon prio=10 tid=0x00007fc1cca1a000 nid=0x58c3 ru | |
nnable [0x00007fc1ba665000] | |
2012-06-13_17:26:28.64290 java.lang.Thread.State: RUNNABLE | |
2012-06-13_17:26:28.64291 at org.apache.lucene.analysis.standard.std31.UAX29URLEmailTokenizerImpl31.getNextToken(U | |
AX29URLEmailTokenizerImpl31.java:3610) | |
2012-06-13_17:26:28.64291 at org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer.incrementToken(UAX29URLEma | |
ilTokenizer.java:223) | |
2012-06-13_17:26:28.64293 at org.apache.lucene.analysis.FilteringTokenFilter.incrementToken(FilteringTokenFilter.j | |
ava:48) | |
2012-06-13_17:26:28.64294 at org.apache.lucene.analysis.standard.StandardFilter.incrementToken(StandardFilter.java | |
:55) | |
2012-06-13_17:26:28.64295 at org.apache.lucene.analysis.ASCIIFoldingFilter.incrementToken(ASCIIFoldingFilter.java: | |
72) | |
2012-06-13_17:26:28.64295 at org.apache.lucene.analysis.LowerCaseFilter.incrementToken(LowerCaseFilter.java:60) | |
2012-06-13_17:26:28.64296 at org.apache.lucene.analysis.snowball.SnowballFilter.incrementToken(SnowballFilter.java | |
:76) | |
2012-06-13_17:26:28.64297 at org.apache.lucene.index.DocInverterPerField.processFields(DocInverterPerField.java:18 | |
5) | |
2012-06-13_17:26:28.64298 at org.apache.lucene.index.DocFieldProcessorPerThread.processDocument(DocFieldProcessorP | |
erThread.java:278) | |
2012-06-13_17:26:28.64299 at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:766) | |
2012-06-13_17:26:28.64299 at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:2327) | |
2012-06-13_17:26:28.64300 at org.elasticsearch.index.engine.robin.RobinEngine.innerIndex(RobinEngine.java:571) | |
2012-06-13_17:26:28.64301 - locked <0x00000006d466da58> (a java.lang.Object) | |
2012-06-13_17:26:28.64301 at org.elasticsearch.index.engine.robin.RobinEngine.index(RobinEngine.java:477) | |
2012-06-13_17:26:28.64302 at org.elasticsearch.index.shard.service.InternalIndexShard.index(InternalIndexShard.jav | |
a:323) | |
2012-06-13_17:26:28.64303 at org.elasticsearch.action.index.TransportIndexAction.shardOperationOnPrimary(Transport | |
IndexAction.java:206) | |
2012-06-13_17:26:28.64303 at org.elasticsearch.action.support.replication.TransportShardReplicationOperationAction | |
$AsyncShardOperationAction.performOnPrimary(TransportShardReplicationOperationAction.java:529) | |
2012-06-13_17:26:28.64304 at org.elasticsearch.action.support.replication.TransportShardReplicationOperationAction | |
$AsyncShardOperationAction$1.run(TransportShardReplicationOperationAction.java:427) | |
2012-06-13_17:26:28.64306 at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110) | |
2012-06-13_17:26:28.64306 at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603) | |
2012-06-13_17:26:28.64307 at java.lang.Thread.run(Thread.java:636) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
3572 zzForAction: { | |
3573 while (true) { | |
3574 | |
3575 if (zzCurrentPosL < zzEndReadL) | |
3576 zzInput = zzBufferL[zzCurrentPosL++]; | |
3577 else if (zzAtEOF) { | |
3578 zzInput = YYEOF; | |
3579 break zzForAction; | |
3580 } | |
3581 else { | |
3582 // store back cached positions | |
3583 zzCurrentPos = zzCurrentPosL; | |
3584 zzMarkedPos = zzMarkedPosL; | |
3585 boolean eof = zzRefill(); | |
3586 // get translated positions and possibly new buffer | |
3587 zzCurrentPosL = zzCurrentPos; | |
3588 zzMarkedPosL = zzMarkedPos; | |
3589 zzBufferL = zzBuffer; | |
3590 zzEndReadL = zzEndRead; | |
3591 if (eof) { | |
3592 zzInput = YYEOF; | |
3593 break zzForAction; | |
3594 } | |
3595 else { | |
3596 zzInput = zzBufferL[zzCurrentPosL++]; | |
3597 } | |
3598 } | |
3599 int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ]; | |
3600 if (zzNext == -1) break zzForAction; | |
3601 zzState = zzNext; | |
3602 | |
3603 zzAttributes = zzAttrL[zzState]; | |
3604 if ( (zzAttributes & 1) == 1 ) { | |
3605 zzAction = zzState; | |
3606 zzMarkedPosL = zzCurrentPosL; | |
3607 if ( (zzAttributes & 8) == 8 ) break zzForAction; | |
3608 } | |
3609 | |
3610 } | |
3611 } | |
3612 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Haven't seen any other reports of this. It'll hang your cluster pretty badly. It appears to be a bug in Lucene 3.5, but I haven't recovered the document that triggers it. Haven't tried latest 3.6.
To get this info, I sent
kill -QUIT <elasticsearch pid>
to get a thread dump on stdout then didtop -u elasticsearch -H
to see thread responsible for 100% CPU. The pid is the thread id. Convert it from decimal to hex, then grep the thread dump for the hex thread id to see the backtrace.