Pyhtonでファイルの文字コードを変換するユーティリティであるnkfのドロップインリプレースメントとなるツールを作りたいと思います。
- システムのデフォルトの文字コードを取得する
- システムのデフォルトの改行文字を取得する
- chardetにより文字コードを判定する
- python-iconvにより文字コードを変換する
- このとき、改行文字の変換も行う
#!/usr/bin/env python3 | |
import argparse | |
import sys | |
import os | |
import pandas as pd | |
def csv_to_latex(input_file, output_file = None): | |
# Read csv file |
#!/usr/bin/env python3 | |
# ref: https://stackoverflow.com/questions/2804543/read-subprocess-stdout-line-by-line#2813530 | |
import io | |
import os | |
import subprocess | |
import sys | |
from pygments import highlight |
# The languages.json is obtained from the tokei project | |
# Source: https://github.com/XAMPPRocky/tokei/blob/c8e4d0703252c87b1df45382b365c6bb00769dbe/languages.json | |
from typing import Dict, Counter as CounterType | |
from collections import Counter | |
import json | |
import os | |
import sys |
#!/usr/bin/env python3 | |
# https://chat.openai.com/share/448d1592-749e-49f4-8e44-948b0207d075 | |
def to_markdown(jypyter_notebook_json): | |
cells = jypyter_notebook_json["cells"] | |
for cell in cells: | |
ct = cell["cell_type"] | |
if ct == "markdown": |
# ref https://zenn.dev/syoyo/articles/9a159ee747835a | |
import sys | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
max_length = 512 | |
# ref https://huggingface.co/facebook/nllb-200-distilled-1.3B | |
# The model was trained with input lengths not exceeding 512 tokens, therefore translating longer sequences might result in quality degradation. |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
static char num_fig_buf[4096]; // the size must be > 3 * 1000 | |
unsigned int keta(unsigned int i) | |
{ | |
unsigned int k = 0; | |
while (1) { |
#!/usr/bin/env python3 | |
import argparse | |
import re | |
import sys | |
from typing import Dict, Iterator, List, Optional, Set, Tuple | |
def load_words(file_path: str) -> Set[str]: | |
with open(file_path, 'r') as f: |
# 2023.03.15 fix: change spec of command-line arguments | |
# ref: https://github.com/zushi0516/arxiv_paper2slack/blob/main/paper_arxiv.py | |
# python3 -m pip install arxiv | |
import argparse | |
import re | |
import arxiv |
#!/usr/bin/env bash | |
# Temporary files for storing the list of extensions | |
tempfile1=$(mktemp) | |
tempfile2=$(mktemp) | |
# Save versions of extensions before the extension update process | |
code --list-extensions --show-versions > "$tempfile1" | |
# Print a message to indicate that the extension update process starts |