sudo apt-get remove scala-library scala
sudo wget www.scala-lang.org/files/archive/scala-2.11.8.deb
# to list all ports that are used | |
sudo netstat -ntlp | grep LISTEN | |
# you can obtain a specific port using the following command | |
sudo netstat -ntlp | grep :8080 | |
# when you execute the command above you will see some thing like that | |
tcp 0 0 0.0.0.0:27370 0.0.0.0:* LISTEN 4394/skype | |
tcp 0 0 127.0.1.1:53 0.0.0.0:* LISTEN 2216/dnsmasq | |
tcp 0 0 127.0.0.1:631 0.0.0.0:* LISTEN 4912/cupsd |
apk add python wget py-pip python-dev gcc musl-dev libxml2-dev libxslt-dev
echo "Installing opcua via pip may take longer than expected..."
pip install setuptools opcua
val conf = sc.hadoopConfiguration
val fs = org.apache.hadoop.fs.FileSystem.get(conf)
val exists = fs.exists(new org.apache.hadoop.fs.Path("/path/on/hdfs")) // File or directory
val sequenceFiles1 = htu.getDFSCluster.getFileSystem.listStatus(new Path(outputFileDirforCrashes)).filter(_.isDirectory).map(_.getPath.toString)
sequenceFiles1.foreach(dirPath=>{
val files = htu.getDFSCluster.getFileSystem.listStatus(new Path(dirPath)).filter(_.isFile).map(_.getPath.toString)
CREATE EXTERNAL TABLE table_name ( | |
vehicle_id String, | |
start_time String, | |
av0 Double | |
) | |
STORED AS parquet | |
LOCATION '/path/in/hdfs/without//hdfs://'; |
# The following example is for a job that produces daily, parquets partitioned by 'day' column | |
DB="database" | |
BASE_DIR="/output/parquets" | |
TBL="${DB}.table" # name of the impala table | |
PQT="${BASE_DIR}/day" # name of the parent directory containing output-subdirectories (in the format 'day=ddMMyyy') | |
# create/mount impala table | |
impala-shell -q "drop table if exists $TBL; | |
create external table $TBL ( | |
account_id string, |