-
-
Save dgomesbr/5dc3e2d5bb4fcc90f82cb94fe6fd6561 to your computer and use it in GitHub Desktop.
server: | |
port: 8080 | |
spring: | |
profiles: | |
active: dev | |
redis: | |
host: 127.0.0.1 | |
port: 6379 |
import com.lambdaworks.redis.RedisAsyncConnection; | |
import com.lambdaworks.redis.RedisClient; | |
import com.lambdaworks.redis.SetArgs; | |
import org.slf4j.Logger; | |
import org.springframework.beans.factory.annotation.Value; | |
import org.springframework.boot.CommandLineRunner; | |
import org.springframework.boot.SpringApplication; | |
import org.springframework.boot.autoconfigure.SpringBootApplication; | |
import redis.clients.jedis.Jedis; | |
import redis.clients.jedis.JedisPool; | |
import redis.clients.jedis.JedisPoolConfig; | |
import redis.clients.jedis.Pipeline; | |
import java.io.BufferedReader; | |
import java.io.FileInputStream; | |
import java.io.InputStreamReader; | |
import java.nio.charset.StandardCharsets; | |
import java.nio.file.Path; | |
import java.nio.file.Paths; | |
import java.util.List; | |
import java.util.concurrent.TimeUnit; | |
import java.util.concurrent.atomic.AtomicLong; | |
@SpringBootApplication | |
public class RedisLoaderApplication implements CommandLineRunner { | |
/** | |
* Default value for keys inserted | |
*/ | |
public static String DEFAULT_VALUE = "1"; | |
/** | |
* http://redis.io/commands/SET - NX -- Only set the key if it does not already exist. | |
*/ | |
public static String NX_MODE = "NX"; | |
/** | |
* Used for syncing operations with Jedis | |
*/ | |
public AtomicLong counter = new AtomicLong(0L); | |
Logger logger = org.slf4j.LoggerFactory.getLogger(RedisLoaderApplication.class); | |
@Value("${loader.redis.ip:127.0.0.1}") | |
String redisServerHost; | |
@Value("${loader.redis.port:6379}") | |
int redisServerPort; | |
@Value("${loader.input.src}") | |
String inputFile; | |
@Value("${loader.input.method:JEDIS}") | |
String runWith; | |
/** | |
* To be used along with inputFile | |
*/ | |
@Value("${loader.input.pattern:*}") | |
String inputFilePattern; | |
public static void main(String[] args) { | |
SpringApplication.run(RedisLoaderApplication.class, args); | |
} | |
JedisPool jedisPool() { | |
int defaultTimeout = 30; | |
JedisPoolConfig poolConfig = new JedisPoolConfig(); | |
poolConfig.setTestOnBorrow(true); | |
poolConfig.setTestWhileIdle(true); | |
poolConfig.setMaxWaitMillis(defaultTimeout); | |
poolConfig.setMaxTotal(8); | |
poolConfig.setMinIdle(8); | |
return new JedisPool(poolConfig, redisServerHost, redisServerPort, defaultTimeout); | |
} | |
private void runWithJedis(BufferedReader br) { | |
JedisPool pool = jedisPool(); | |
try (Jedis jedis = pool.getResource()) { | |
jedis.flushAll(); | |
Pipeline p = jedis.pipelined(); | |
br.lines().forEach(key -> { | |
p.set(key, DEFAULT_VALUE, NX_MODE); | |
if (counter.addAndGet(1L) == 350_000) { | |
counter.set(0L); | |
p.sync(); | |
} | |
}); | |
p.sync(); | |
} | |
} | |
private void runWithLettuce(BufferedReader br) { | |
RedisClient client = RedisClient.create("redis://" + redisServerHost + ":" + redisServerPort + "/0"); | |
/* | |
* The AutoFlushCommands state is set per connection and affects therefore all threads using | |
* the shared connection. If you want to omit this effect, use dedicated connections. | |
* The [!!!]AutoFlushCommands state cannot be set on pooled connections by the lettuce connection pooling. | |
* */ | |
//RedisConnectionPool<RedisAsyncCommands<String, String>> pool = client.asyncPool(); | |
//try(RedisAsyncCommands<String, String> cn = pool.allocateConnection()){ | |
RedisAsyncConnection<String, String> cn = client.connectAsync(); | |
cn.flushall(); | |
cn.setAutoFlushCommands(false); | |
br.lines().forEach(key -> { | |
cn.set(key, DEFAULT_VALUE, SetArgs.Builder.nx()); | |
if (counter.addAndGet(1L) == 350_000) { | |
counter.set(0L); | |
cn.flushCommands(); | |
} | |
}); | |
cn.flushCommands(); | |
//} | |
//pool.close(); | |
} | |
@Override | |
public void run(String... args) throws Exception { | |
logger.info("Redis Loader started"); | |
long start = System.nanoTime(); | |
List<Path> files = FileUtils.listFiles(Paths.get("/load/"), "(^x.*)"); | |
// test data | |
BufferedReader br = new BufferedReader( | |
new InputStreamReader( | |
new FileInputStream(inputFile), StandardCharsets.UTF_8 | |
), | |
64 * 1000 * 1000 | |
); | |
if ("JEDIS".equalsIgnoreCase(runWith)) { | |
// 10_000_000 in 49s [475_000 control sync] | |
// 10_000_000 in 29s [350_000 control sync] <<< USED, less sync calls | |
// 10_000_000 in 28s [275_000 control sync] | |
// 10_000_000 in 28s [200_000 control sync] | |
// 10_000_000 in 27s [100_000 control sync] | |
// 10_000_000 in 27s [ 50_000 control sync] | |
runWithJedis(br); | |
} else { | |
// --------------------------------------------------------------------- | |
// *DEAD* restTemplate doesn't support chunking, would require chunking | |
// before the file loop | |
// --------------------------------------------------------------------- | |
// 10_000_000 in 512s [Using Reader outside the file] | |
// x in -s [OOM (probably cause of the SYNC for 10kk responses)] | |
// runWithRedisTemplate(br); | |
/* | |
* Unexpected exception during request: java.lang.OutOfMemoryError: Java heap space java.lang.OutOfMemoryError: Java heap space | |
* */ | |
runWithLettuce(br); | |
} | |
logger.info("Redis Loader finished (took " + TimeUnit.SECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS) + "s)"); | |
} | |
} |
$ head LOAD.txt | |
5511946258960_41_0 | |
555384433222_39_0 | |
555384535698_82_0 | |
5589999183267_74_0 | |
5581987694232_41_0 | |
5521989909934_82_0 | |
554184720023_26_1431367 | |
5581987365904_93_0 | |
5511946769372_39_1543498 | |
5511988257589_141_0 |
Lettuce pipeline and batch flush support at: https://github.com/mp911de/lettuce/issues/92
No matter which number (100 - 350k) I try at https://gist.github.com/dgomesbr/5dc3e2d5bb4fcc90f82cb94fe6fd6561#file-redisloaderapplication-java-L103, always get HEAP exaustion.
Use a batch size between 100 and 1000 elements, not (100k but 100)
This is all very interesting. I think there is space to improve Jedis by adding something to support a batching and/or fire&forget kind of behavior in pipelines. It seems like there are useful use cases, and depending on how we do it, it shouldn't be too hard to implement. /cc @marcosnils @HeartSaVioR
@mp911de for time being we excluded lettuce and trying to improve the bulk execution with Jedis.
@xetorthio the only thing I'm probably missing is the fire'n'forget, we're having some issues on small redis boxes where a couple of ops aren't done successfully so we reduced the batchSize for 50k. We started parsing the response (via Response.get) and filtering != null responses, but not a single one failed but instead of having 34kk keys we having 100k missing one, trying to debug it right now.
@xetorthio opened redis/jedis#1272 so we can see if that comes in handy to other people.
Please Let me know the resulta of tour debug.
@dgomesbr interesting case. Please keep us posted about the results so we can improve Jedis to handle these cases.
/cc @xetorthio @HeartSaVioR
@xetorthio @marcosnils
Seems like gist doesn't send notification mail when I'm mentioned.
@dgomesbr
I didn't know Java Socket supports full duplex. If reading and writing at the same time doesn't make an issue (Sorry I'm not expert on this), we could have sync thread to sync periodically in the background. I guess it could be used with normal case, not only 'fire and forget' case.
@xetorthio @marcosnils @marcosnils I've put all the info and the updated code at redis/jedis#1272 so we don't miss notification and interaction.
Debug shown nothing valuable, but I found that Redis on Windows is not as reliable for testing purposes as Linux. We couldn't reproduce it on Ubuntu
RedisTemplate
has no chunking support/intermediate sync calls. Jedis reads/retains always the response, even if you don't want it. Maybe you want to bulk load data using the Jedis API directly or take a look at https://github.com/mp911de/lettuce which createsFuture
s. A Future is not required to be read so fire&forget is a bit simpler. lettuce is also supported by Spring Data Redis.