Created
September 13, 2020 13:06
-
-
Save infoankitp/4438e97edffa71083f4b75c4864e4e78 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
final static int SEED = 123456; | |
final static int LOG2M = 15; // Number of Buckets = 2^LOG2M | |
final static int REGWIDTH = 5; | |
final static int EXPTHRESH = -1; | |
final static boolean SPARSEON = true; | |
/** | |
* Generates a HLL Object with numbers from 0 to n the parameter passed | |
* @param n : Number of Elements in the HLL. | |
* @return Byte Array of the HLL Generated | |
*/ | |
public static byte[] generateHLLofListOfElements(int n) { | |
HashFunction hf = Hashing.murmur3_128(HLLTest.SEED); | |
HLL hll = new HLL(LOG2M, REGWIDTH, EXPTHRESH, SPARSEON, HLLType.EMPTY); | |
for (int i = 0; i < n; ++i) { | |
long hashedValue = hf.hashInt(i).asLong(); | |
hll.addRaw(hashedValue); | |
} | |
return hll.toBytes(); | |
} | |
/** | |
* Inserts a row in the hll_test table with HLL in the form of Byte Arrays and app_id | |
* @param app_id String Application_id | |
* @param user_id_hll Byte Array of the HLL of User Ids | |
* @param session_id_hll Byte Array of the HLL of Session Ids | |
*/ | |
public static void insertHLLByteArrayDB(String app_id, byte[] user_id_hll, byte[] session_id_hll) { | |
try { | |
PreparedStatement stmt = conn.prepareStatement("INSERT INTO hll_test (page, user_id_hll, session_id_hll) VALUES (?, ?, ?)"); | |
stmt.setString(1, app_id); | |
stmt.setBytes(2, user_id_hll); | |
stmt.setBytes(3, session_id_hll); | |
stmt.executeUpdate(); | |
stmt.close(); | |
} catch (SQLException e) { | |
System.err.println(e.getClass().getName() + ": " + e.getMessage()); | |
} | |
} | |
/** | |
* Acquires counts of the Unique User ID Count and Session ID Count after merging different rows. | |
* @return HashMap of the counts | |
*/ | |
public static HashMap<String, Long> getGroupedCardinality() { | |
HashMap<String, Long> rslt = new HashMap<>(); | |
try { | |
Statement stmt = conn.createStatement(); | |
String query = "SELECT hll_cardinality(hll_union_agg(user_id_hll :: hll)) as unique_user_count, " + | |
"hll_cardinality(hll_union_agg(session_id_hll ::hll)) as unique_session_count FROM hll_test"; | |
ResultSet rs = stmt.executeQuery(query); | |
rs.next(); | |
long unique_user_cnt = rs.getLong("unique_user_count"); | |
long unique_session_cnt = rs.getLong("unique_session_count"); | |
rslt.put("unique_user_count", unique_user_cnt); | |
rslt.put("unique_session_count", unique_session_cnt); | |
} catch (SQLException e) { | |
System.err.println(e.getClass().getName() + ": " + e.getMessage()); | |
} | |
return rslt; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment