Skip to content

Instantly share code, notes, and snippets.

@yuzawa-san
Created July 29, 2020 18:39
Show Gist options
  • Save yuzawa-san/e12b16a4aac7dc7bec2a94c4b8c2da89 to your computer and use it in GitHub Desktop.
Save yuzawa-san/e12b16a4aac7dc7bec2a94c4b8c2da89 to your computer and use it in GitHub Desktop.
OnnxRuntimeMultithreadCrash
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import ai.onnxruntime.OnnxTensor;
import ai.onnxruntime.OrtEnvironment;
import ai.onnxruntime.OrtSession;
import ai.onnxruntime.OrtSession.Result;
public class OnnxRuntimeMultithreadCrash {
private static final String SESSION_NAME = "foo";
private static final String MODEL_FILE = "/my_model.onnx";
// a fake AI service
public static void main(String[] args) {
OrtEnvironment env = OrtEnvironment.getEnvironment();
Map<String, OrtSession> sessions = new ConcurrentHashMap<>();
ScheduledExecutorService sched = Executors.newScheduledThreadPool(1);
sched.scheduleWithFixedDelay(() -> {
// management thread: periodically reloads models
try {
System.out.println("SWAP START");
OrtSession session = env.createSession(MODEL_FILE);
System.out.println("LOADED");
OrtSession oldSession = sessions.put(SESSION_NAME, session);
if (oldSession != null) {
// kill the session, but it may still be in use.
oldSession.close();
}
System.out.println("SWAP DONE");
} catch (Exception e) {
System.err.println("Failed to swap");
e.printStackTrace();
}
}, 0, 10, TimeUnit.SECONDS);
int nThreads = 4;
ExecutorService exec = Executors.newFixedThreadPool(nThreads);
for (int i = 0; i < nThreads; i++) {
exec.submit(() -> {
// server threads which do evaluations of the model
// an infinite loop here, but in reality handling requests
while (true) {
try {
OrtSession session = sessions.get(SESSION_NAME);
if (session == null) {
continue;
}
try (OnnxTensor tensor = OnnxTensor.createTensor(env,
new float[][] { { 0, 24603, 1800000 } })) {
Map<String, OnnxTensor> input = new HashMap<>();
input.put("float_input", tensor);
try (Result result = session.run(input)) {
float[][] value = (float[][]) result.get(0).getValue();
if (value[0][0] != 0.56f) {
System.err.println("bad value");
}
}
}
} catch (Exception e) {
System.err.println("Failed evaluation");
e.printStackTrace();
}
}
});
}
// shutdown hooks would normally be here
}
}
SWAP START
LOADED
SWAP DONE
SWAP START
LOADED
SWAP DONE
SWAP START
LOADED
#
# A fatal error has been detected by the Java Runtime Environment:
#
# SIGSEGV (0xb) at pc=0x000000011941afb6, pid=65753, tid=0x0000000000003e03
#
# JRE version: Java(TM) SE Runtime Environment (8.0_121-b13) (build 1.8.0_121-b13)
# Java VM: Java HotSpot(TM) 64-Bit Server VM (25.121-b13 mixed mode bsd-amd64 compressed oops)
# Problematic frame:
# C [libonnxruntime.dylib+0x32fb6] std::__1::__hash_const_iterator<std::__1::__hash_node<std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, onnxruntime::InferenceSession::InputDefMetaData>, void*>*> std::__1::__hash_table<std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, onnxruntime::InferenceSession::InputDefMetaData>, std::__1::__unordered_map_hasher<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, onnxruntime::InferenceSession::InputDefMetaData>, std::__1::hash<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, true>, std::__1::__unordered_map_equal<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, onnxruntime::InferenceSession::InputDefMetaData>, std::__1::equal_to<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, true>, std::__1::allocator<std::__1::__hash_value_type<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, onnxruntime::InferenceSession::InputDefMetaData> > >::find<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&) const+0x86
#
# Failed to write core dump. Core dumps have been disabled. To enable core dumping, try "ulimit -c unlimited" before starting Java again
#
# An error report file with more information is saved as:
# /hs_err_pid65753.log
SWAP DONE
#
# If you would like to submit a bug report, please visit:
# http://bugreport.java.com/bugreport/crash.jsp
# The crash happened outside the Java Virtual Machine in native code.
# See problematic frame for where to report the bug.
#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment