That will be easy if you use something like jieba. Example:
import jieba
# text from http://li-xirong.github.io/pub/icmr2016_chinese_caption.pdf
#!/bin/sh | |
# Copyright 2023 Khalifah K. Shabazz | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a | |
# copy of this software and associated documentation files (the “Software”), | |
# to deal in the Software without restriction, including without limitation | |
# the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
# and/or sell copies of the Software, and to permit persons to whom the | |
# Software is furnished to do so, subject to the following conditions: |
LEETCODE_USER=leetcode username | |
GITHUB_TOKEN=github token | |
GIST_ID=gist id | |
GIST_FILE=progress.txt |
https://hackernoon.com/top-10-system-design-interview-questions-for-software-engineers-8561290f0444 | |
https://medium.com/@codingfreak/binary-tree-interview-questions-and-practice-problems-439df7e5ea1f | |
https://cspiration.com/leetcodeClassification | |
heap sort | |
https://www.hackerearth.com/practice/algorithms/sorting/heap-sort/tutorial/ | |
https://medium.com/@randerson112358/lets-build-a-min-heap-4d863cac6521 | |
https://stackblitz.com/ |
from typing import List, Tuple, Optional, Union, Any, ContextManager, Callable, overload | |
import builtins | |
import math | |
import pickle | |
class dtype: ... | |
_dtype = dtype |
#!/bin/bash | |
# | |
# script to extract ImageNet dataset | |
# ILSVRC2012_img_train.tar (about 138 GB) | |
# ILSVRC2012_img_val.tar (about 6.3 GB) | |
# make sure ILSVRC2012_img_train.tar & ILSVRC2012_img_val.tar in your current directory | |
# | |
# https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md | |
# | |
# train/ |
# -*- coding: utf-8 -*- | |
# | |
# Author: Taylor G Smith | |
# | |
# Recommender system ranking metrics derived from Spark source for use with | |
# Python-based recommender libraries (i.e., implicit, | |
# http://github.com/benfred/implicit/). These metrics are derived from the | |
# original Spark Scala source code for recommender metrics. | |
# https://github.com/apache/spark/blob/master/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala |
#!/usr/bin/env python | |
# -*- coding:UTF-8 -*- | |
import torch | |
import torch.nn as nn | |
import torch.nn.init as init | |
def weight_init(m): | |
''' |
# My tmux configuration, partly based on https://github.com/wbkang/wbk-stow/blob/master/tmux-config/.tmux.conf | |
# Scroll History | |
set -g history-limit 50000 | |
# show messages for 4 seconds instead | |
set -g display-time 4000 | |
# set first window to index 1 (not 0) to map more to the keyboard layout | |
set-option -g renumber-windows on |
import datetime | |
import linecache | |
import os | |
import pynvml3 | |
import torch | |
print_tensor_sizes = True | |
last_tensor_sizes = set() | |
gpu_profile_fn = f'{datetime.datetime.now():%d-%b-%y-%H:%M:%S}-gpu_mem_prof.txt' |