- Install tools to build:
sudo apt-get update
sudo apt-get install kernel-package fakeroot wget bzip2- Linux-2.6.39.1-linode34 is same as regular 2.6.39
| # Python https://github.com/github/gitignore/blob/main/Python.gitignore | |
| # Byte-compiled / optimized / DLL files | |
| __pycache__/ | |
| *.py[cod] | |
| *$py.class | |
| # C extensions | |
| *.so | |
| # Distribution / packaging |
| # ~/.bashrc: executed by bash(1) for non-login shells. | |
| # see /usr/share/doc/bash/examples/startup-files (in the package bash-doc) | |
| # for examples | |
| #lib64 variants come from Fedora installation | |
| export TERM=xterm-color | |
| export GO_HOME=/home/atr/src/go/ | |
| export JAVA_HOME=/home/atr/sw/jdk1.8.0_221/ | |
| export PATH=$GO_HOME/bin:$JAVA_HOME/bin/:/home/atr/local/bin/:$PATH |
| atr@atrnuc:~$ cat .bashrc | |
| # ~/.bashrc: executed by bash(1) for non-login shells. | |
| # see /usr/share/doc/bash/examples/startup-files (in the package bash-doc) | |
| # for examples | |
| export TERM=xterm-color | |
| export GO_HOME=/home/atr/src/go/ | |
| export JAVA_HOME=/home/atr/sw/jdk1.8.0_221/ | |
| export PATH=$GO_HOME/bin:$JAVA_HOME/bin/:/home/atr/local/bin/:$PATH | |
| export LD_LIBRARY_PATH=/home/atr/local/lib/:/home/atr/local/usr/local/lib/:$LD_LIBRARY_PATH |
| /* | |
| * MIT License | |
| Copyright (c) 2020-2021 | |
| Authors: Sacheendra Talluri, Giulia Frascaria, and, Animesh Trivedi | |
| This code is part of the Storage System Course at VU Amsterdam | |
| Permission is hereby granted, free of charge, to any person obtaining a copy | |
| of this software and associated documentation files (the "Software"), to deal |
| " Don't try to be vi compatible | |
| set nocompatible | |
| " Helps force plugins to load correctly when it is turned back on below | |
| filetype off | |
| " TODO: Load plugins here (pathogen or vundle) | |
| " Turn on syntax highlighting | |
| syntax on |
| This patch deos the following changes: | |
| * moves two common function "getNullCount" and "splitAndTransferValidityBuffer" to the top-level BaseValueVector. This change requries moving "validityBuffer" to the BaseValueVector class (as recommended in this TODO: https://github.com/apache/arrow/blob/master/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java#L89) | |
| * optimize the implementation of loadValidityBuffer (in the BaseValueVector) to just pass the reference for the validity buffer read from the storage | |
| * optimize for the common boundary condition when all variables are valid (as done in the C++ code: https://github.com/apache/arrow/blob/master/cpp/src/arrow/array.h#L290) | |
| The optimization delivers performance. | |
| Tests: Read 50M integers from a single Int column (2GB). |
| // Author: Animesh Trivedi | |
| // [email protected] | |
| import org.apache.spark.sql.{SaveMode, SparkSession} | |
| import scala.collection.mutable.ListBuffer | |
| import scala.util.Random | |
| private def generateTSRecord(key: Array[Byte], recBuf:Array[Byte], rand: Random): Unit = { | |
| val fixed = 10 |
| # Command to launch TPCDS: | |
| # ./bin/spark-submit -v --master local[2] --class com.ibm.crail.spark.tools.ParquetGenerator ~/jars/parquet-generator-1.0.jar -c tpcds -o crail://localhost:9060/F1/tpcds/ -p 4 -t 4 -tsf 1 -tdsd /home/atr/zrl/external/github/databricks/tpcds-kit/tools/ -tdd 1 | |
| # And you need to put core-site.xml from crail into the conf folder. | |
| # Licensed to the Apache Software Foundation (ASF) under one or more | |
| # contributor license agreements. See the NOTICE file distributed with | |
| # this work for additional information regarding copyright ownership. | |
| # The ASF licenses this file to You under the Apache License, Version 2.0 | |
| # (the "License"); you may not use this file except in compliance with | |
| # the License. You may obtain a copy of the License at |
| crail.blocksize 4096 | |
| crail.buffersize 4096 | |
| #crail.buffersize 1048576 | |
| #crail.buffersize 8192 | |
| #crail.slicesize 8192 | |
| crail.regionsize 1073741824 | |
| crail.cachelimit 1073741824 |