Skip to content

Instantly share code, notes, and snippets.

@fish8
fish8 / start_spark.sh
Last active June 24, 2022 09:35
start spark shell on cip cluster #cip #ss #spark
1.大集群
spark2.4.3_2.11
1.1 yarn模式
/opt/ubd/core/spark-2.4.3/bin/spark-shell --master yarn --queue ss_deploy --driver-memory 35g --num-executors 50 --executor-cores 4 --executor-memory 20g
1.2 local模式
/opt/ubd/core/spark-2.4.3/bin/spark-shell --queue ss_deploy --driver-memory 35g --num-executors 50 --executor-cores 4 --executor-memory 20g
spark2.4.2_2.12
/opt/ubd/core/spark-2.4.2/bin/spark-shell --queue ss_deploy --driver-memory 35g --num-executors 50 --executor-cores 4 --executor-memory 20g
@fish8
fish8 / create_table_hive.sql
Last active July 9, 2018 08:30
create internal/external table on hive #hive #ss
CREATE EXTERNAL TABLE `j_crh04_30`(
`jid` int,
`uid` bigint,
`hits` int,
`max_mps` double)
row format delimited fields terminated by '|' Lines terminated by '\n' null defined as '' stored as textfile
location '/user/ss_deploy/hive.db/dm_taozt/j_crh04_30';
-- charactoristic speed origninal
CREATE EXTERNAL TABLE `j_cspeed_org`(
@fish8
fish8 / moving_avg_crh_hit.sql
Created July 10, 2018 07:07
crh cell hit moving average #ss #crh
select t1.user_id,t1.journey_id,t1.journey_via_point_id,t1.location_id,t1.start_dt,t1.ts_jvp_id,t1.journey_via_point_type_id,t1.cell_id,
sum((case when t2.cell_id is null then 0 else 1 end) + (case when t3.cell_id is null then 0 else 1 end)
+ (case when t4.cell_id is null then 0 else 1 end) + (case when t5.cell_id is null then 0 else 1 end) + (case when t6.cell_id is null then 0 else 1 end))/5
from (
select v.user_id,
v.journey_id,
v.journey_via_point_id,
v.location_id,
v.start_dt,
v.ts_jvp_id,
@fish8
fish8 / tile servers.js
Created January 8, 2020 06:21
some tile server urls
var baseLayers = {
"高德地图": L.tileLayer('http://webrd0{s}.is.autonavi.com/appmaptile?lang=zh_cn&size=1&scale=1&style=8&x={x}&y={y}&z={z}', { subdomains: "1234" }).addTo(map),
"高德影像": L.layerGroup([
L.tileLayer('http://webst0{s}.is.autonavi.com/appmaptile?style=6&x={x}&y={y}&z={z}', { subdomains: "1234" }),
L.tileLayer('http://webst0{s}.is.autonavi.com/appmaptile?x={x}&y={y}&z={z}&lang=zh_cn&size=1&scale=1&style=8', { subdomains: "1234" })
]),
"天地图": L.layerGroup([
L.tileLayer('http://t{s}.tianditu.cn/DataServer?T=vec_w&X={x}&Y={y}&L={z}', { subdomains: ['0', '1', '2', '3', '4', '5', '6', '7'] }),
L.tileLayer('http://t{s}.tianditu.cn/DataServer?T=cva_w&X={x}&Y={y}&L={z}', { subdomains: ['0', '1', '2', '3', '4', '5', '6', '7'] })
@fish8
fish8 / provinces.py
Created January 14, 2020 10:55
provinces.py
provinces = {11: '北京', 31: '上海', 51: '广东', 83: '重庆', 74: '湖南', 70: '青海', 30: '安徽', 38: '福建', 87: '甘肃', 59: '广西', 50: '海南', 75: '江西', 13: '天津', 90: '吉林', 97: '黑龙江',
91: '辽宁', 71: '湖北', 89: '新疆', 88: '宁夏', 86: '云南', 36: '浙江', 17: '山东', 79: '西藏', 10: '内蒙古', 76: '河南', 81: '四川', 18: '河北', 19: '山西', 84: '陕西', 34: '江苏', 85: '贵州'}
@fish8
fish8 / expr-latex.py
Created February 19, 2020 03:15
sympy string expression to latex
import sympy as sp
from collections import defaultdict
class GenerateSymbols(defaultdict):
def __missing__(self, key):
self[key] = sp.Symbol(key)
return self[key]
#global symbols in dictionary
@fish8
fish8 / pdf ucr.bash
Last active February 4, 2021 03:51
pdf ocr
# via Tesseract
pdfsandwich -lang chi_sim zgsy.pdf
#merging pages
pdftk A=zgsy.pdf B=zgsy_ocr.pdf cat A1 B2-27 A28 B29-57 A58 B59-140 A141 B142-end output zzz.pdf
@fish8
fish8 / command line excel.bash
Last active September 14, 2020 07:15
command line excel, csv
#to csv
#via csvkit
in2csv --sheet 区县级 基站工参统计.xlsx #to stdout
# via Gnumeric
ssconvert -T Gnumeric_stf:stf_csv 基站工参统计.xlsx
#select columns
in2csv --sheet 区县级 基站工参统计.xlsx |csvcut -c "城市名称","人数"
@fish8
fish8 / kde_ridge_plot.py
Created January 7, 2021 05:29
ridge plot, joyplot
"""
Overlapping densities ('ridge plot')
====================================
"""
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
@fish8
fish8 / jupyter-code.sh
Last active February 5, 2021 01:21
jupyter notebook code search and print
#pynb-code, show all codes in a jupyter notebook
jq -j '.cells | map( select(.cell_type == "code") | .source +["\n\n"]) | .[][] ' $@
#beautify code in notebook and copy to clipboard as rich text
pynb-code xxx.ipynb |pygmentize -f rtf | xclip -t text/rtf -selection clipboard
#find print
find . -name "*.ipynb" -exec pynb-code {} +
#find grep