Skip to content

Instantly share code, notes, and snippets.

View maltzsama's full-sized avatar

Demetrius Albuquerque maltzsama

View GitHub Profile
WITH daily_sales AS (
SELECT
p.producer_id,
EXTRACT(DOW FROM s.purchase_date::DATE) AS day_of_week,
EXTRACT(YEAR FROM s.purchase_date::DATE) AS year,
COUNT(s.purchase_id) AS total_sales
FROM
"case".sales s
JOIN "case".products p ON
p.product_id = s.product_id

Clearing all cache from a Windows system can help improve performance and free up disk space. Here are the steps to clear various types of cache on Windows:

1. Clear Temporary Files Cache

  1. Press Windows + R to open the Run dialog box.
  2. Type temp and press Enter. This will open the Temp folder.
  3. Select all files and folders (Ctrl + A) and press Delete. You may need administrator permissions for some files.
  4. Repeat the process with %temp% in the Run dialog box.

2. Clear System Cache

  1. Press Windows + R to open the Run dialog box.
@maltzsama
maltzsama / parquet_reader.rs
Created April 4, 2024 23:48
leitor de parquet usando rust
use std::fs::File;
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
fn main(){
let file_path = "Flights_1m.parquet";
let file = File::open(file_path).unwrap();
// let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
println!("Converted arrow schema is: {}", builder.schema());
let mut reader = builder.build().unwrap();
@maltzsama
maltzsama / presto_sql.sql
Last active December 29, 2023 12:12
Geração de CPF válido usando Presto SQL e PySpark SQL
WITH RECURSIVE
cpf(digits, len) AS (
SELECT ARRAY[CAST(FLOOR(RANDOM() * 10) AS INTEGER)], 1
UNION ALL
SELECT digits || CAST(FLOOR(RANDOM() * 10) AS INTEGER), len + 1
FROM cpf
WHERE len < 9
),
cpf_with_first_verifier(digits, len) AS (
SELECT digits || (CASE WHEN 11 - MOD(SUM((10 - i + 1) * v), 11) >= 10 THEN 0 ELSE 11 - MOD(SUM((10 - i + 1) * v), 11) END), len + 1
@maltzsama
maltzsama / exercicio-API.md
Last active June 30, 2023 17:12
Exercicio API

Exercicio - 1

Baseado na api da FIPE Crie um script python capaz de salvar os dados retornados do endereço de marcas em uma tabela SQLite

Tabela:

  • id
  • name

recomendação: usar a biblioteca requests do python

@maltzsama
maltzsama / pickleerror.md
Created May 22, 2023 17:53
pickleerror.md

O erro "TypeError: cannot pickle 'SSLContext' object" ocorre quando você está tentando serializar um objeto SSLContext, que não é serializável por padrão no PySpark.

Para contornar esse erro, você pode definir uma função personalizada para converter cada linha em JSON usando a biblioteca json do Python. Aqui está um exemplo de como fazer isso:

from pyspark.sql import SparkSession
import json

# Função para converter uma linha em JSON
def row_to_json(row):
@maltzsama
maltzsama / dataframe_sender_sqsqueue.py
Created May 18, 2023 17:12
Send message from pyspark to SQS Queue
from pyspark.sql import SparkSession
import boto3
# Configurar a sessão Spark
spark = SparkSession.builder.getOrCreate()
# Criar DataFrame com dados de exemplo
data = [("mensagem1",), ("mensagem2",), ("mensagem3",)]
df = spark.createDataFrame(data, ["message"])
@maltzsama
maltzsama / aurora_insert_spark.py
Last active March 23, 2023 14:21
para inserir o tipo json no postgresql
from pyspark.sql import SparkSession
def init_spark():
spark = SparkSession.builder.config("spark.jars", "/postgresql-42.5.4.jar")\
.master("local").appName("PySpark_Postgres_test").getOrCreate()
sc = spark.sparkContext
return spark,sc
def main():
-- create delta table on athena
CREATE EXTERNAL TABLE cliente
LOCATION 's3://bucket_name/cliente/'
TBLPROPERTIES ('table_type' = 'DELTA')
@maltzsama
maltzsama / min_time.py
Last active April 27, 2022 13:44
querying number
#!/usr/bin/env python
# -*- coding: utf-8 -*-
def min_time(string):
count = 0
position = 0
string = string.lower()
for element in range(len(string)):
unicode_value = ord(string[element]) - 97
first_distance = abs(position - unicode_value)
second_distance = 26 - abs(position - unicode_value)