Skip to content

Instantly share code, notes, and snippets.

View xiaohan2012's full-sized avatar
🍠
eating sweat potatos

Xiao Han xiaohan2012

🍠
eating sweat potatos
View GitHub Profile
default_language_version:
python: python3.11
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-json
@xiaohan2012
xiaohan2012 / han-macros.sty
Last active October 10, 2024 06:05
Han's Latex macros
\ProvidesPackage{han-macros}[2023-03-03 Han Macros package]
\usepackage{mathtools}
\usepackage{amsmath}
\usepackage{bbm}
\usepackage[dvipsnames]{xcolor}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}
\newcommand{\nat}{\ensuremath{\mathbb N}\xspace}
@xiaohan2012
xiaohan2012 / ray_pbar.py
Created February 1, 2023 20:07
Progress bar in Ray
import ray
from tqdm import tqdm
class RayProgressBar:
@staticmethod
def num_jobs_done_iter(obj_ids):
while obj_ids:
done, obj_ids = ray.wait(obj_ids)
yield ray.get(done[0])
{
"embeddings": [
{
"tensorName": "20newsgroup",
"tensorShape": [
1727,
100
],
"tensorPath": "https://gist.githubusercontent.com/xiaohan2012/ea60050a0093f86237210762804d066b/raw/16a004a3f95b00d0e416f4f61bca79b663904735/20news-embedding.tsv",
"metadataPath": "https://gist.githubusercontent.com/xiaohan2012/bba8d962a560983b2a4f1e6302f77766/raw/594edaed48724a03c1575fe98a0e974cd8ae8c6f/20newsgroup-labels.tsv"
We can make this file beautiful and searchable if this error is corrected: No tabs found in this TSV file in line 0.
1
0
1
1
1
0
0
0
1
0
We can't make this file beautiful and searchable because it's too large.
0.4290986657142639 0.42358970642089844 0.42887815833091736 0.42253378033638 0.5642207860946655 0.4708421230316162 0.4873978793621063 0.42862436175346375 0.4253680408000946 0.43093568086624146 0.7340912818908691 0.3532145619392395 0.42910027503967285 0.42882195115089417 0.8975470066070557 0.4787288010120392 0.43022769689559937 0.42916831374168396 0.8609343767166138 0.8841681480407715 0.9066706895828247 0.7836756706237793 0.42883408069610596 0.7690004706382751 0.4311878979206085 0.43890127539634705 0.7569071054458618 0.42820173501968384 0.8944039940834045 0.4259122610092163 0.8377361297607422 0.683721125125885 0.42419368028640747 0.42937594652175903 0.4367840886116028 0.8066317439079285 0.9182034730911255 0.5937310457229614 0.49091818928718567 0.4636102318763733 0.43012914061546326 0.8237452507019043 0.9045807719230652 0.4264647662639618 0.42861756682395935 0.6517515182495117 0.845058023929596 0.4262949824333191 0.7197322249412537 0.4352913200855255 0.4283246397972107 0.470052033662796 0.8975110054016113 0.4232
[{"label":"State 0","isSelected":true,"tSNEIteration":342,"tSNEPerplexity":5,"tSNELearningRate":1,"tSNEis3d":false,"pcaComponentDimensions":[0,1,2],"projections":[{"pca-0":-0.1805076003074646,"pca-1":-0.052298735827207565,"pca-2":0.7568523287773132,"pca-3":-0.44091248512268066,"pca-4":-0.30822834372520447,"pca-5":-0.13008351624011993,"pca-6":0.004937216639518738,"pca-7":0.24454794824123383,"pca-8":0.036458518356084824,"pca-9":-0.011632245033979416,"tsne-0":25.64325238381404,"tsne-1":-2.2261678929273576},{"pca-0":-0.33329975605010986,"pca-1":0.5092434287071228,"pca-2":0.14169825613498688,"pca-3":0.3302021920681,"pca-4":0.5076401829719543,"pca-5":-0.2071685791015625,"pca-6":-0.15575245022773743,"pca-7":-0.16170069575309753,"pca-8":0.21544840931892395,"pca-9":0.047406088560819626,"tsne-0":-3.150499961645024,"tsne-1":3.058536067646262},{"pca-0":-0.4729415476322174,"pca-1":-0.227816641330719,"pca-2":-0.5151332020759583,"pca-3":-0.5534241199493408,"pca-4":0.043921008706092834,"pca-5":-0.04174278303980827,"pca-6":-0
{
"embeddings": [
{
"tensorName": "Data science label embedding (from stackexchange)",
"tensorShape": [
328,
48
],
"tensorPath": "https://gist.githubusercontent.com/xiaohan2012/493ae0ac0b813ae0d720cc13315009ea/raw/80dc605d270a366bb3d1282fd2a2a040d2cff4bd/datascience-label-embedding.tsv",
"metadataPath": "https://gist.githubusercontent.com/xiaohan2012/93d7d5048eaf0aa67250fdc30e3a6c72/raw/0d0da39df461b2f8a16b7f8955289ed1361d992b/datascience-label-text.tsv",
We can make this file beautiful and searchable if this error is corrected: No tabs found in this TSV file in line 0.
nltk
computer-vision
missing-data
gensim
smote
classification
word2vec
powerbi
rnn
education
We can make this file beautiful and searchable if this error is corrected: It looks like row 2 should actually have 48 columns, instead of 7 in line 1.
0.8404189348220825 0.7459937930107117 0.6308125257492065 0.873324990272522 0.9206571578979492 0.8535201549530029 0.8021265268325806 0.9472481608390808 0.8903049230575562 0.9364266395568848 0.8126969933509827 0.6442192196846008 0.9481191039085388 0.7041640877723694 0.9348658323287964 0.5755905508995056 0.40711039304733276 0.27529847621917725 0.9251526594161987 0.9246417284011841 0.7352113723754883 0.01757754199206829 0.6355307698249817 0.972328782081604 0.9226069450378418 0.6047204732894897 0.41323211789131165 0.33520519733428955 0.617024302482605 0.9292117953300476 0.9471990466117859 0.7368165254592896 0.8655673861503601 0.9826233983039856 0.819502592086792 0.8250176906585693 0.8147366642951965 0.6649282574653625 0.6875866055488586 0.6668310165405273 0.88420170545578 0.5418125987052917 0.6760655641555786 0.7673236727714539 0.6285435557365417 0.9396171569824219 0.2997831106185913 0.9628713130950928
0.8436189889907837 0.794937014579773 0.75254225730896 0.7011001110076904 0.9641327857971191 0.7552525401115417 0.