- Install tools to build:
sudo apt-get update
sudo apt-get install kernel-package fakeroot wget bzip2
- Linux-2.6.39.1-linode34 is same as regular 2.6.39
# Python https://github.com/github/gitignore/blob/main/Python.gitignore | |
# Byte-compiled / optimized / DLL files | |
__pycache__/ | |
*.py[cod] | |
*$py.class | |
# C extensions | |
*.so | |
# Distribution / packaging |
# ~/.bashrc: executed by bash(1) for non-login shells. | |
# see /usr/share/doc/bash/examples/startup-files (in the package bash-doc) | |
# for examples | |
#lib64 variants come from Fedora installation | |
export TERM=xterm-color | |
export GO_HOME=/home/atr/src/go/ | |
export JAVA_HOME=/home/atr/sw/jdk1.8.0_221/ | |
export PATH=$GO_HOME/bin:$JAVA_HOME/bin/:/home/atr/local/bin/:$PATH |
atr@atrnuc:~$ cat .bashrc | |
# ~/.bashrc: executed by bash(1) for non-login shells. | |
# see /usr/share/doc/bash/examples/startup-files (in the package bash-doc) | |
# for examples | |
export TERM=xterm-color | |
export GO_HOME=/home/atr/src/go/ | |
export JAVA_HOME=/home/atr/sw/jdk1.8.0_221/ | |
export PATH=$GO_HOME/bin:$JAVA_HOME/bin/:/home/atr/local/bin/:$PATH | |
export LD_LIBRARY_PATH=/home/atr/local/lib/:/home/atr/local/usr/local/lib/:$LD_LIBRARY_PATH |
/* | |
* MIT License | |
Copyright (c) 2020-2021 | |
Authors: Sacheendra Talluri, Giulia Frascaria, and, Animesh Trivedi | |
This code is part of the Storage System Course at VU Amsterdam | |
Permission is hereby granted, free of charge, to any person obtaining a copy | |
of this software and associated documentation files (the "Software"), to deal |
" Don't try to be vi compatible | |
set nocompatible | |
" Helps force plugins to load correctly when it is turned back on below | |
filetype off | |
" TODO: Load plugins here (pathogen or vundle) | |
" Turn on syntax highlighting | |
syntax on |
This patch deos the following changes: | |
* moves two common function "getNullCount" and "splitAndTransferValidityBuffer" to the top-level BaseValueVector. This change requries moving "validityBuffer" to the BaseValueVector class (as recommended in this TODO: https://github.com/apache/arrow/blob/master/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java#L89) | |
* optimize the implementation of loadValidityBuffer (in the BaseValueVector) to just pass the reference for the validity buffer read from the storage | |
* optimize for the common boundary condition when all variables are valid (as done in the C++ code: https://github.com/apache/arrow/blob/master/cpp/src/arrow/array.h#L290) | |
The optimization delivers performance. | |
Tests: Read 50M integers from a single Int column (2GB). |
// Author: Animesh Trivedi | |
// [email protected] | |
import org.apache.spark.sql.{SaveMode, SparkSession} | |
import scala.collection.mutable.ListBuffer | |
import scala.util.Random | |
private def generateTSRecord(key: Array[Byte], recBuf:Array[Byte], rand: Random): Unit = { | |
val fixed = 10 |
# Command to launch TPCDS: | |
# ./bin/spark-submit -v --master local[2] --class com.ibm.crail.spark.tools.ParquetGenerator ~/jars/parquet-generator-1.0.jar -c tpcds -o crail://localhost:9060/F1/tpcds/ -p 4 -t 4 -tsf 1 -tdsd /home/atr/zrl/external/github/databricks/tpcds-kit/tools/ -tdd 1 | |
# And you need to put core-site.xml from crail into the conf folder. | |
# Licensed to the Apache Software Foundation (ASF) under one or more | |
# contributor license agreements. See the NOTICE file distributed with | |
# this work for additional information regarding copyright ownership. | |
# The ASF licenses this file to You under the Apache License, Version 2.0 | |
# (the "License"); you may not use this file except in compliance with | |
# the License. You may obtain a copy of the License at |
crail.blocksize 4096 | |
crail.buffersize 4096 | |
#crail.buffersize 1048576 | |
#crail.buffersize 8192 | |
#crail.slicesize 8192 | |
crail.regionsize 1073741824 | |
crail.cachelimit 1073741824 |