Skip to content

Instantly share code, notes, and snippets.

View FindHao's full-sized avatar
💭
Don't worry, be happy

Yueming Hao FindHao

💭
Don't worry, be happy
View GitHub Profile
from ctypes import c_void_p, c_long
import torch
import math
import random
import os
import tempfile
from math import inf, nan
from torch._inductor.hooks import run_intermediate_hooks
from torch._inductor.utils import maybe_profile
from torch import empty_strided, as_strided, device
#0 c10::get_backtrace[abi:cxx11](unsigned long, unsigned long, bool) (frames_to_skip=<optimized out>, frames_to_skip@entry=1, maximum_number_of_frames=maximum_number_of_frames@entry=64,
skip_python_frames=skip_python_frames@entry=true) at /scratch/findhao/pytorch/c10/util/Backtrace.cpp:285
#1 0x00007fff836f0821 in c10::(anonymous namespace)::<lambda()>::operator() (__closure=<optimized out>) at /scratch/findhao/pytorch/c10/util/Logging.cpp:28
#2 std::_Function_handler<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >(), c10::(anonymous namespace)::GetFetchStackTrace()::<lambda()> >::_M_invoke(const std::_Any_data &) (
__functor=...) at /usr/include/c++/9/bits/std_function.h:286
#3 0x00007fff836f1d53 in std::function<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > ()>::operator()() const (this=<optimized out>) at /usr/include/c++/9/bits/std_function.h:683
#4 c10::Error::Error (this=0x558e620, source_location=..., msg=...) at /scratch/f
import time
from torch import profiler
import torch
import argparse
out_channels = 64
input_shape = [1, 3, 224, 224]
weight_shape = [64, 3, 7, 7]
stride = (2, 2)
@FindHao
FindHao / runqq.sh
Last active December 24, 2022 22:33
a script to automatically update and run Icalingua++ QQ on x86 linux
#!/bin/bash
# Put this script to the same folder with Icalingua++-XX.AppImage or where you want to download the appimage.
# get script path
file_path=$(dirname $(readlink -f $0))
cd $file_path
# check github releases for new version
url=$(curl -s https://api.github.com/repos/Icalingua-plus-plus/Icalingua-plus-plus/releases/latest | grep "browser_download_url.*AppImage" | cut -d : -f 2,3 | tr -d \" | sed -n '3p')
# check if url is empty
@FindHao
FindHao / resnet.py
Created December 1, 2022 19:23
an resnet example for jax
# https://github.com/phlippe/uvadlc_notebooks_benchmarking/blob/main/PyTorch/Tutorial5_Inception_ResNet_DenseNet.py
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter
import torch.utils.data as data
import torch
from flax.training import train_state, checkpoints
from flax import linen as nn
from jax import random
import jax.numpy as jnp
@FindHao
FindHao / backup.sh
Last active April 14, 2024 11:29
vps backup
#!/usr/bin/env bash
# This script is based on Copyright (C) 2013 - 2020 Teddysun <[email protected]>
# Modified by FindHao <[email protected]>
# https://findhao.net/easycoding/2605.html
# You must modify the config before run it!!!
# [[ $EUID -ne 0 ]] && echo "Error: This script must be run as root!" && exit 1
import torch
from torch import profiler
import numpy as np
def _len_and_dim_norm(vectors):
"""
length and attention head size dim normalization
"""
vectors = vectors * torch.rsqrt(
from torch import profiler
import torch
import torch.nn.functional as F
import argparse
def run_conv2d(input_shape, weight_shape, other_args, profile_folder):
input = torch.ones(input_shape, dtype=torch.float32, device='cuda')
weight = torch.ones(weight_shape, dtype=torch.float32, device='cuda')
bias = other_args[0]
stride = other_args[1]
@FindHao
FindHao / test_conv2d.py
Created July 21, 2022 21:42
An example to reproduce the CUDNN kernel compilation leading to performance issues
from torch import profiler
import torch
import torch.nn.functional as F
import argparse
def profile(input_shape, weight_shape, other_args, profile_folder):
activity_groups = []
activity_groups.append(profiler.ProfilerActivity.CUDA)
activity_groups.append(profiler.ProfilerActivity.CPU)
@FindHao
FindHao / pt_profiler_example.py
Created July 5, 2022 21:05
pytorch profiler
t0 = time.time_ns()
activity_groups = []
activity_groups.append(profiler.ProfilerActivity.CUDA)
activity_groups.append(profiler.ProfilerActivity.CPU)
profile_detailed=True
with profiler.profile(
schedule=profiler.schedule(wait=0, warmup=0, active=1),
activities=activity_groups,
record_shapes=profile_detailed,
profile_memory=profile_detailed,