# 列出所有的Interface
sudo tcpdump -D
# 抓包
sudo tcpdump -i 1 -A -s 0 'tcp port 8080'
Online server:
tcpcopy -x ONELINE_SERVER_PORT-TEST_SERVER_IP:TEST_SERVER_PORT -s INTERCEPT_ID:INTERCEPT_PORT
Test server:
intercept -F 'tcp and port TEST_SERVER_PORT' -p INTERCEPT_PORT
ab -r -c1000 -n30000 -p data.json -T 'application/json' 'http://xxx'
# 正向代理
server {
listen 1234;
location / {
# 修改HTTP Header
#proxy_set_header Content-Type $http_content_type;
#proxy_set_header Content-Length $http_content_length;
proxy_pass $scheme://$http_host$request_uri;
}
}
# proxy_pass后记录Response
location / {
proxy_pass http://xxx;
body_filter_by_lua 'ngx.log(ngx.INFO, ngx.arg[1])';
}
# 改为动态表
ALTER TABLE table_name ROW_FORMAT=dynamic;
# 从文件中随机抽取1000条
sort -R file > head -1000
# 离线安装pip
python pip-6.0.6-py2.py3-none-any.whl/pip install --no-index pip-6.0.6-py2.py3-none-any.whl
# 删除历史文件
git filter-branch --force --index-filter 'git rm --cached --ignore-unmatch path-to-your-remove-file' --prune-empty --tag-name-filter cat -- --all
git push origin master --force
# 删除所有commit
git checkout --orphan latest_branch
git add -A
git commit -am "commit message"
git branch -D master
git branch -m master
git push -f origin master
# Streaming依赖Python自定义包
# 仅有mapper
# 本地app目录包含mapper.py及其依赖的库
hadoop jar hadoop-streaming-1.2.1.jar -D mapreduce.job.maps=xxx -D mapreduce.job.queuename=xxx -D mapreduce.task.timeout=xxx -D mapreduce.job.priority=xxx -Dmapreduce.job.name=xxx -files app -mapper "python app/mapper.py" -input /xxx -output /xxx -numReduceTasks 0
# 使用VirtualEnv管理依赖包
-files 'env,mapper.py' -mapper 'python mapper.py'
mapper.py里添加:
import sys
sys.path.append('env/lib/python2.7/site-packages')
hadoop jar hadoop-streaming-1.2.1.jar \
-jobconf stream.map.output.field.separator=, \
-jobconf mapred.textoutputformat.separator=, \
-file mapper.py \
-input /INPUT \
-output /OUTPUT \
-mapper "mapper.py" \
-numReduceTasks 0
# grep on Hadoop
hadoop jar hadoop-streaming-1.2.1.jar \
-D mapreduce.job.name="grep" \
-D stream.non.zero.exit.is.failure=false \
-mapper "grep $1" \
-numReduceTasks 0 \
-input /INPUT \
-output /OUTPUT
# sed on Hadoop
hadoop jar hadoop-streaming-1.2.1.jar \
-D mapreduce.job.name="sed" \
-D stream.non.zero.exit.is.failure=false \
-mapper "sed 's/|/\t/g" \
-numReduceTasks 0 \
-input /INPUT \
-output /OUTPUT
创建topic
./kafka-topics.sh --zookeeper ip:port,ip:port --create --partitions 36 --replication-factor 3 --topic mytopic
curl xxx -w '@format.txt'
# format.txt
\n
time_namelookup: %{time_namelookup}\n
time_connect: %{time_connect}\n
time_appconnect: %{time_appconnect}\n
time_pretransfer: %{time_pretransfer}\n
time_redirect: %{time_redirect}\n
time_starttransfer: %{time_starttransfer}\n
----------\n
time_total: %{time_total}\n
# 并行
# 4个进程
cat file | xargs -P 4 -I {} sh run.sh {}
rsync -arvLP --exclude=log USERNAME@IP:PATH .
# ssh定时发送心跳,连接复用
# cat ~/.ssh/config
Host *
TCPKeepAlive yes
ServerAliveInterval 30
ControlMaster auto
ControlPath ~/.ssh/master-%r@%h:%p
# 忽略错误压缩
tar --ignore-failed-read -cf DIR.tgz DIR
# 在新的tab打开文件
# .vscode/settings.json中增加
"workbench.editor.enablePreview": false
# 批量替换
find . -type f -print0 | xargs -0 sed -i 's/a/b/g'