Usually, located at /usr/local/cuda/bin
$ nvprof python train_mnist.py
I prefer to use --print-gpu-trace.
-- author: weedge | |
-- params: KEYS[1] user asset key | |
-- params: KEYS[2] event msg key | |
-- params: ARGV[1] incr asset num eg:1,-1 | |
-- params: ARGV[2] user asset key ttl | |
-- params: ARGV[3] event msg key ttl | |
-- return 1:操作成功, 0:无操作,-1:缓存资产不存在,-2:资产不足, | |
-- debug: | |
-- redis-cli --ldb --eval user_asset_change.redis.lua I.asset.{100} M.asset.{100}.`ksuid` , 100 86400 86400 | |
-- redis-cli -c -p 26383 --ldb --eval user_asset_change.redis.lua I.asset.{100} M.asset.{100}.`ksuid` , 100 86400 86400 |
package main | |
import ( | |
"context" | |
"encoding/json" | |
"log" | |
"os" | |
"os/signal" | |
"strconv" | |
"syscall" |
package main | |
import ( | |
"context" | |
"log" | |
"os" | |
"os/signal" | |
"syscall" | |
"github.com/apache/rocketmq-client-go/v2" |
show engines; | |
show databases; | |
show variables like "%partition%" ; | |
SHOW NODE; | |
drop database pay; | |
create database pay PARTITION_MODE=partitioning; | |
SHOW CREATE DATABASE pay; | |
use pay; |
show engines; | |
show databases; | |
drop database pay; | |
create database pay ; | |
use pay; | |
show tables; | |
drop table `pay`.`user_asset`; | |
CREATE TABLE `user_asset` | |
( | |
`userId` bigint unsigned NOT NULL DEFAULT '0', |
SHOBJ_CFLAGS ?= -fno-common -g -ggdb | |
SHOBJ_LDFLAGS ?= -shared -Bsymbolic | |
CFLAGS = -Wall -g -fPIC -lc -lm -Og -std=gnu99 | |
CC=gcc | |
all: example.so | |
example.so: example.o | |
$(LD) -o $@ example.o $(SHOBJ_LDFLAGS) $(LIBS) -lc |
.PHONY: using-gcc using-gcc-static using-clang | |
using-gcc: | |
g++-4.8 -o main-gcc -lasan -O -g -fsanitize=address -fno-omit-frame-pointer main.cpp && \ | |
ASAN_OPTIONS=symbolize=1 ASAN_SYMBOLIZER_PATH=$(shell which llvm-symbolizer) ./main-gcc | |
using-gcc-static: | |
g++-4.8 -o main-gcc-static -static-libstdc++ -static-libasan -O -g -fsanitize=address -fno-omit-frame-pointer main.cpp && \ | |
ASAN_OPTIONS=symbolize=1 ASAN_SYMBOLIZER_PATH=$(shell which llvm-symbolizer) ./main-gcc-static |
#include <iostream> | |
#include <random> | |
#include <chrono> | |
#include <x86intrin.h> | |
#include <cassert> | |
// Runtime evaluation for squared Eucliden distance functions | |
// - fvec_L2_sqr_ref: naive reference impl from Faiss | |
// - fvec_L2_sqr_sse: SSE impl from Faiss | |
// - fvec_L2_sqr_avx: AVX impl from Faiss |
/* | |
* Parallel bitonic sort using CUDA. | |
* Compile with | |
* nvcc -arch=sm_11 bitonic_sort.cu | |
* Based on http://www.tools-of-computing.com/tc/CS/Sorts/bitonic_sort.htm | |
* License: BSD 3 | |
*/ | |
#include <stdlib.h> | |
#include <stdio.h> |