Skip to content

Instantly share code, notes, and snippets.

@davideanastasia
Last active December 18, 2015 03:49
Show Gist options
  • Save davideanastasia/5720903 to your computer and use it in GitHub Desktop.
Save davideanastasia/5720903 to your computer and use it in GitHub Desktop.
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.UUID;
import org.apache.cassandra.db.marshal.AsciiType;
import org.apache.cassandra.dht.IPartitioner;
import org.apache.cassandra.dht.Murmur3Partitioner;
import org.apache.cassandra.io.sstable.SSTableSimpleUnsortedWriter;
import static org.apache.cassandra.utils.ByteBufferUtil.bytes;
import static org.apache.cassandra.utils.UUIDGen.decompose;
/**
*
* @author davide
*/
public class CassandraBulkInserter
{
static String filename;
public static void main(String[] args) throws IOException
{
if (args.length == 0)
{
System.out.println("Expecting <csv_file> as argument");
System.exit(1);
}
filename = args[0];
String keyspace = "demo";
File directory = new File(keyspace);
if (!directory.exists()) {
directory.mkdir();
}
BufferedReader reader = new BufferedReader(new FileReader(filename));
IPartitioner partitioner = new Murmur3Partitioner();
SSTableSimpleUnsortedWriter usersWriter = new SSTableSimpleUnsortedWriter(
directory,
partitioner,
keyspace,
"users",
AsciiType.instance,
null,
64);
String line;
int lineNumber = 0;
CsvEntry entry = new CsvEntry();
// There is no reason not to use the same timestamp for every column in that example.
long timestamp = System.currentTimeMillis() * 1000;
while ( (line = reader.readLine()) != null )
{
if (entry.parse(line, lineNumber))
{
System.out.println(lineNumber + ": " + entry);
ByteBuffer uuid = ByteBuffer.wrap(decompose(entry.key));
usersWriter.newRow(uuid);
// usersWriter.addColumn(bytes("id"), uuid, timestamp);
usersWriter.addColumn(bytes("firstname"), bytes(entry.firstname), timestamp);
usersWriter.addColumn(bytes("lastname"), bytes(entry.lastname), timestamp);
usersWriter.addColumn(bytes("password"), bytes(entry.password), timestamp);
usersWriter.addColumn(bytes("age"), bytes(entry.age), timestamp);
usersWriter.addColumn(bytes("email"), bytes(entry.email), timestamp);
}
lineNumber++;
}
// Don't forget to close!
usersWriter.close();
reader.close();
System.exit(0);
}
static class CsvEntry
{
UUID key;
String firstname;
String lastname;
String password;
long age;
String email;
@Override
public String toString() {
return key.toString() +" : {firstname = " + firstname +
", lastname = " + lastname +
", password = " + password +
", age = " + age +
", email = " + email + "}";
}
boolean parse(String line, int lineNumber)
{
String[] columns = line.split(",");
// System.out.println(columns.length);
// Ghetto csv parsing
if (columns.length != 5)
{
System.out.println(
String.format("Invalid input at line %d of %s",
lineNumber, filename));
return false;
}
try
{
key = UUID.randomUUID();
firstname = columns[0].trim();
lastname = columns[1].trim();
password = columns[2].trim();
age = Long.parseLong(columns[3].trim());
email = columns[4].trim();
return true;
}
catch (NumberFormatException e)
{
System.out.println(
String.format("Invalid number in input at line %d of %s",
lineNumber, filename));
System.out.println(e.getMessage());
return false;
}
}
}
}
David Anas mypass 31 [email protected]
Dave Smith mypass 26 [email protected]
John Doe mypass 34 [email protected]
create keyspace demo
with replication = {'class' : 'SimpleStrategy', 'replication_factor' : 1 };
use demo
create table users (
id uuid primary key,
firstname varchar,
lastname varchar,
password varchar,
age int,
email varchar)
with compact storage
and compaction = {'class' : 'LeveledCompactionStrategy' };
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment