Boot up with an Fedora Live USB stick.
- Run
vgsto check if there's any space:
$ sudo vgs
VG #PV #LV #SN Attr VSize VFree
fedora 1 3 0 wz--n- <237.28g 0 | # %% | |
| import httpx | |
| import pandas as pd | |
| # %% Read CSV and rename headers | |
| websites = pd.read_csv("resources/popular_websites.csv", index_col=0) | |
| print(websites) | |
| # %% Define function to check connection |
| import org.apache.hadoop.conf.Configuration; | |
| import org.apache.iceberg.*; | |
| import org.apache.iceberg.catalog.Catalog; | |
| import org.apache.iceberg.catalog.TableIdentifier; | |
| import org.apache.iceberg.data.GenericRecord; | |
| import org.apache.iceberg.data.IcebergGenerics; | |
| import org.apache.iceberg.data.Record; | |
| import org.apache.iceberg.data.parquet.GenericParquetWriter; | |
| import org.apache.iceberg.hadoop.HadoopCatalog; | |
| import org.apache.iceberg.io.CloseableIterable; |
| # Copyright 2020 Google LLC | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # https://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| // UserDefinedAggregateFunction is the contract to define | |
| // user-defined aggregate functions (UDAFs) | |
| class MyCountUDAF extends UserDefinedAggregateFunction { | |
| // Este método abaixo define pode ser invocado apenas assim: inputSchema(0) | |
| // Isto é feito via inversão de dependência pelo Spark | |
| // o retorno é um objeto StructField assim: | |
| // StructField("id", LongType, true, {}) | |
| // o objeto StructField é do pacote org.apache.spark.sql.types | |
| override def inputSchema: StructType = { | |
| new StructType().add("id", LongType, nullable = true) |
| import org.apache.spark.sql.SparkSession | |
| import org.apache.spark.sql.SparkSession.Builder | |
| import org.apache.spark.SparkContext | |
| import org.apache.log4j.{Level, Logger} | |
| // A sparkSession é provida pelo proprio Spark Shell | |
| // O nivel de log também já é configurado pela Spark Shell | |
| def boolean_udf_wrapper(a:String, b:String, t:Any): Boolean = { true } | |
| def string_udf_wrapper(a:String, b:String, t:Any): String = { "••••" } | |
| import org.apache.spark.sql.functions.expr | |
| import org.apache.spark.sql.functions.sum |
| def happyEmployees(salary: Int) => salary > 2200 | |
| def smartTextCase(name: String) => name.toUpperCase() |
| # Usando o Kubernetes com Docker in Docker (DIND) | |
| sudo mkdir -p /usr/local | |
| cd /usr/local | |
| sudo mkdir dind-cluster | |
| cd dind-cluster/ | |
| sudo chmod o+w . | |
| ls -lat .. | head | |
| # wget https://cdn.rawgit.com/kubernetes-sigs/kubeadm-dind-cluster/master/fixed/dind-cluster-v1.10.sh | |
| curl -O https://cdn.rawgit.com/kubernetes-sigs/kubeadm-dind-cluster/master/fixed/dind-cluster-v1.10.sh |