Skip to content

Instantly share code, notes, and snippets.

View masayuki038's full-sized avatar

Masayuki Takahashi masayuki038

View GitHub Profile
import time
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
def load_data():
return [{"a": "k1", "b": 2}, {"a": "k2", "b": 4}, {"a": "k3", "b": 5}, {"a": "k1", "b": 15}]
/* 1 */ org.apache.calcite.DataContext root;
/* 2 */
/* 3 */ public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root0) {
/* 4 */ root = root0;
/* 5 */ final org.apache.calcite.linq4j.Enumerable _inputEnumerable = ((org.apache.calcite.adapter.csv.CsvTranslatableTable) root.getRootSchema().getSubSchema("SALES").getTable("EMPS")).project(root, new int[] {
/* 6 */ 0,
/* 7 */ 1,
/* 8 */ 3});
/* 9 */ return new org.apache.calcite.linq4j.AbstractEnumerable(){
/* 10 */ public org.apache.calcite.linq4j.Enumerator enumerator() {
2019-06-03 20:49:33,973 [main] DEBUG - Cheapest plan:
EnumerableProject(EMPNO=[$0], GENDER=[$2], NAME=[$1]): rowcount = 15.0, cumulative cost = {71.66666666666667 rows, 187.08333333333334 cpu, 0.0 io}, id = 59
EnumerableFilter(condition=[=($1, 'John')]): rowcount = 15.0, cumulative cost = {56.66666666666667 rows, 142.08333333333334 cpu, 0.0 io}, id = 58
CsvTableScan(table=[[SALES, EMPS]], fields=[[0, 1, 3]]): rowcount = 100.0, cumulative cost = {41.66666666666667 rows, 42.083333333333336 cpu, 0.0 io}, id = 57
LogicalProject(subset=[rel#28:Subset#3.ENUMERABLE.[]], EMPNO=[$0], GENDER=[$2], NAME=[$1]): rowcount = 15.0, cumulative cost = {15.0 rows, 45.0 cpu, 0.0 io}, id = 23
LogicalFilter(subset=[rel#22:Subset#2.NONE.[]], condition=[=($1, 'John')]): rowcount = 15.0, cumulative cost = {15.0 rows, 100.0 cpu, 0.0 io}, id = 21
LogicalProject(subset=[rel#20:Subset#1.NONE.[]], EMPNO=[$0], NAME=[$1], GENDER=[$3]): rowcount = 100.0, cumulative cost = {100.0 rows, 300.0 cpu, 0.0 io}, id = 19
CsvTableScan(subset=[rel#18:Subset#0.ENUMERABLE.[]], table=[[SALES, EMPS]], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io}, id = 0
public class SqlSelect extends SqlCall {
//~ Static fields/initializers ---------------------------------------------
// constants representing operand positions
public static final int FROM_OPERAND = 2;
public static final int WHERE_OPERAND = 3;
public static final int HAVING_OPERAND = 5;
SqlNodeList keywordList;
SqlNodeList selectList;
@Test
public void testExample() throws ClassNotFoundException, SQLException {
Class.forName("org.apache.calcite.jdbc.Driver");
try (Connection conn = DriverManager.getConnection("jdbc:calcite:model=target/test-classes/example-model.json", "foo", "bar")) {
try (Statement stmt = conn.createStatement()) {
stmt.executeQuery("select empno, gender, name from EMPS where name = 'John'");
}
}
}
$ java -cp target/classes;target/dependency/* org.apache.parquet.tools.Main dump D:\tmp\delta\checkpoint2\_delta_log\00000000000000000002.checkpoint.parquet
row group 0
--------------------------------------------------------------------------------
txn:
.appId: BINARY SNAPPY DO:0 FPO:4 SZ:30/28/0.93 VC:7 ENC [more]...
.version: INT64 SNAPPY DO:0 FPO:34 SZ:29/27/0.93 VC:7 ENC [more]...
.lastUpdated: INT64 SNAPPY DO:0 FPO:63 SZ:30/28/0.93 VC:7 ENC [more]...
add:
.path: BINARY SNAPPY DO:0 FPO:93 SZ:304/310/1.02 VC:7 [more]...
.partitionValues:

before

cstore_test=# select product_group, count(*) from customer_reviews group by product_group;
 product_group |  count
---------------+---------
 DVD           |  121418
 Video         |  142235
 Music         |  300628
 Book          | 1198218
@masayuki038
masayuki038 / SampleParquetReader2.java
Created July 22, 2017 15:38
SampleParquetReader2.java
package net.wrap_trap.parquet_sample3;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.ColumnReader;
import org.apache.parquet.column.impl.ColumnReadStoreImpl;
import org.apache.parquet.column.impl.ColumnReaderImpl;
import org.apache.parquet.column.page.*;