Skip to content

Instantly share code, notes, and snippets.

import os
from pypdf import PdfReader
import re
from pathlib import Path
def get_pdf_title(pdf_path):
"""Get PDF title from metadata, fallback to content extraction if needed."""
try:
with open(pdf_path, 'rb') as file:
pdf_reader = PdfReader(file)
import requests
import os
from urllib.parse import urlparse
def convert_to_pdf_url(presentation_url):
# Remove any trailing parameters or /edit
base_url = presentation_url.split('/edit')[0].split('?')[0]
# Add /export/pdf to the end
return f"{base_url}/export/pdf"
# git clone https://github.com/salesforce/BLIP
# cd BLIP
# touch BLIP_img2caption.py
import os
import json
from PIL import Image
import torch
from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode
@e96031413
e96031413 / TW_stock_DCA.py
Created April 28, 2024 04:07
Total Return Calculation for TW Stock assets using Dollar Cost Average Strategy (DCA)
import pandas as pd
from twstock import Stock
import argparse
def parse():
parser = argparse.ArgumentParser()
parser.add_argument(
"--etf_code", type=str, default="00733",
)
@e96031413
e96031413 / stock_backtesting.py
Created April 28, 2024 04:05
Backtesting TW stock with pandas, backtesting, talib, and FinMind
from backtesting import Backtest, Strategy
from backtesting.lib import crossover
from FinMind.data import DataLoader
import pandas as pd
import talib
from talib import abstract
## 取得資料
@e96031413
e96031413 / audio_summary.py
Created April 28, 2024 04:03
Summary Audio with Whisper and openai API
import whisper
import openai
import os
openai.api_key = "sk-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
# input audio file name
audio_file = r"C:\Users\User\Desktop\input.mp3"
# load the model and transcribe the audio
import os
import cv2
import numpy as np
from tqdm import tqdm
import argparse
from facenet_pytorch import MTCNN
mtcnn = MTCNN(select_largest=True, min_face_size=64, post_process=False, device='cuda:0')
import re
import os
urls = "https://drive.google.com/file/d/FILEID_1/view?usp=drive_link, https://drive.google.com/file/d/FILEID_2/view?usp=drive_link, https://drive.google.com/file/d/FILEID_3/view?usp=drive_link"
url_list = urls.split(', ')
pat = re.compile('https://drive.google.com/file/d/(.*)/view\?usp=drive_link')
for idx, url in enumerate(url_list):
g = pat.match(url)
id = g.group(1)
down_url = f'https://drive.google.com/uc?id={id}'
"""
https://github.com/d246810g2000/YOLOX/blob/main/datasets/train_val_data_split_coco.py
"""
import os
import cv2
import json
import random
import shutil
import xml.etree.ElementTree as ET
from tqdm import tqdm
def resume_train(self, model):
if self.args.resume:
logger.info("resume training")
if self.args.ckpt is None:
ckpt_file = os.path.join(self.file_name, "latest" + "_ckpt.pth")
else:
ckpt_file = self.args.ckpt
ckpt = torch.load(ckpt_file, map_location=self.device)
# resume the model/optimizer state dict