Harsimranjeet Saini harry-stark

💭

Songs ,Codes,Coffee and Memes all along the way.

Upgrades people......upgradessss!

harry-stark / sparkrun.py

Created January 24, 2023 06:30

	from spark_session_builder import build_spark_session
	spark= build_spark_session(master="spark://cpu128-dy-r6i-32xlarge-3:7077",num_cores=128,mem_gb=999)
	from pyspark.ml.feature import MinHashLSH,MinHashLSHModel
	from pyspark.ml.linalg import Vectors
	import time
	from pyspark.sql.functions import col
	from pyspark.ml.feature import MinHashLSH, Tokenizer, HashingTF
	hash_size=100
	threshold=0.8
	start=time.time()

harry-stark / codegen_gptj_converter.py

Created January 19, 2023 16:33

```python convertFt.py --output_dir= --n_gpus=8```

	import torch
	from transformers import GPTJForCausalLM, GPTJConfig
	from transformers import CodeGenTokenizer, CodeGenForCausalLM

	def cg2gptj(code_model):
	cg_model = CodeGenForCausalLM.from_pretrained(code_model, torch_dtype="auto")
	cg_config = cg_model.config

	# Create empty GPTJ model
	print('Creating empty GPTJ model')

harry-stark / conda_setup.sh

Created January 3, 2023 14:08

Latest paths script

	module load openmpi cuda/11.7

	#CONDA_HOME=/fsx/quentin/miniconda3
	CONDA_HOME=/fsx/gpt-neox/conda/envs/neox
	#CONDA_HOME=/fsx/gpt-neox/conda/envs/improved-t5
	CUDNN_HOME=/fsx/quentin/cudnn-linux-x86_64-8.6.0.163_cuda11-archive

	export LD_LIBRARY_PATH=$CUDNN_HOME/lib:$LD_LIBRARY_PATH
	export CPATH=$CUDNN_HOME/include:$CPATH

harry-stark / ytdl_utils.py

Created December 6, 2022 22:40

	import pandas as pd
	from urllib.parse import urlparse
	df=pd.read_csv()
	def url_matches_dataframe(url: str, df: pd.DataFrame) -> bool:
	# Parse the given URL to get the netloc and hostname
	parsed_url = urlparse(url)
	netloc = parsed_url.netloc
	hostname = parsed_url.hostname

	# Remove "www" from the netloc and hostname

harry-stark / multinode.sbatch

Created November 24, 2022 14:59

	#!/bin/bash
	#SBATCH --job-name="elm"
	#SBATCH --partition=gpu
	#SBATCH --mem-per-cpu=16GB # Amount of CPU memory
	#SBATCH --nodes=4
	#SBATCH --ntasks-per-node=8 # Crucial - only 1 task per dist per node!
	#SBATCH --cpus-per-task=6 # Number of cores per tasks
	#SBATCH --hint=nomultithread # We get physical cores not logical
	#SBATCH --gres=gpu:8 # Number of gpus
	#SBATCH --output=%x_%j.out # Set this dir where you want slurm outs to go

harry-stark / check_ds.py

Last active November 24, 2022 12:22

	import deepspeed as ds
	print(ds.__version__)

harry-stark / main.sh

Last active November 24, 2022 05:15

Batch script for multinode run

	#!/bin/bash
	#SBATCH --job-name="elm"
	#SBATCH --partition=gpu
	#SBATCH --mem-per-cpu=16GB # Amount of CPU memory
	#SBATCH --nodes=1
	#SBATCH --ntasks-per-node=8 # Crucial - only 1 task per dist per node!
	#SBATCH --cpus-per-task=6 # Number of cores per tasks
	#SBATCH --hint=nomultithread # We get physical cores not logical
	#SBATCH --gres=gpu:8 # Number of gpus
	#SBATCH --output=%x_%j.out # Set this dir where you want slurm outs to go