- hackmd version: https://hackmd.io/1eeNAS1oQuSvMA0q6y_QuA?view
- gist version: https://gist.github.com/bluet/23e7697b86144561c4a3d804903d059d
[TOC]
- Extract 部份:取出要的資料、去雜訊、資料標準化、parsing...
- Transform:aggregation、mapping 、combined、Change Data Types
CREATE proc [dbo].[sp_MSforeachtable] | |
@command1 nvarchar(2000), @replacechar nchar(1) = N'?', @command2 nvarchar(2000) = null, | |
@command3 nvarchar(2000) = null, @whereand nvarchar(2000) = null, | |
@precommand nvarchar(2000) = null, @postcommand nvarchar(2000) = null | |
AS | |
declare @mscat nvarchar(12) | |
select @mscat = ltrim(str(convert(int, 0x0002))) | |
if (@precommand is not null) | |
exec(@precommand) |
-- show running queries (pre 9.2) | |
SELECT procpid, age(clock_timestamp(), query_start), usename, current_query | |
FROM pg_stat_activity | |
WHERE current_query != '<IDLE>' AND current_query NOT ILIKE '%pg_stat_activity%' | |
ORDER BY query_start desc; | |
-- show running queries (9.2) | |
SELECT pid, age(clock_timestamp(), query_start), usename, query | |
FROM pg_stat_activity | |
WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%' |
dk_summarize_with_totals <- function(.data, group_by_var, mean_var){ | |
groups_summary <- .data %>% | |
dplyr::group_by({{ group_by_var }}) %>% | |
dplyr::summarize(mean = mean({{ mean_var }})) %>% | |
dplyr::rename("group" = {{ group_by_var }} ) | |
overall_summary <-.data %>% | |
dplyr::summarize(mean = mean({{ mean_var }})) %>% | |
dplyr::mutate(group = "Total") |
[TOC]
from pathlib import Path | |
def make_table_uris(name: str, basepath: str='.'): | |
"""Example function for generating normalized table URIs | |
Args: | |
name: name of table to generate table URIs for | |
basepath: directory to nest table URIs under | |
I get asked pretty regularly what my opinion is on merge commits vs rebasing vs squashing. I've typed up this response so many times that I've decided to just put it in a gist so I can reference it whenever it comes up again.
I use merge, squash, rebase all situationally. I believe they all have their merits but their usage depends on the context. I think anyone who says any particular strategy is the right answer 100% of the time is wrong, but I think there is considerable acceptable leeway in when you use each. What follows is my personal and professional opinion:
/* | |
Mermaid + DuckDB for generating customer hierarchy diagrams | |
DuckDB version: 0.10.2 | |
Bill Wallis, 2024-05-09 | |
*/ | |
select version(); |
COPY ( | |
WITH a AS ( | |
SELECT h3_cell_to_parent(h3_string_to_h3(SUBSTR(id, 0, 17)), 2) h3_2, | |
COUNT(*) num_recs | |
FROM read_parquet('s3://overturemaps-us-west-2/release/2024-05-16-beta.0/theme=places/type=place/*.parquet', | |
filename=true, | |
hive_partitioning=1) | |
GROUP BY 1 | |
) | |
SELECT h3_cell_to_boundary_wkt(h3_2), |
import requests | |
import time | |
import json | |
import base64 | |
def get_notebook_content(notebook_id_or_name): | |
nb = notebookutils.notebook.get(notebook_id_or_name) | |
workspaceId = nb['workspaceId'] | |
notebookId = nb['id'] | |
format = 'ipynb' |