Skip to content

Instantly share code, notes, and snippets.

View polm's full-sized avatar

Paul O'Leary McCann polm

View GitHub Profile
#!/usr/bin/env python3
# https://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html
# http://bytepawn.com/hacker-news-embeddings-with-pytorch.html
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import json
from random import choice, random, shuffle
@polm
polm / dummy.py
Created October 18, 2019 01:16
Simplified version of embedding training
#!/usr/bin/env python3
# https://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html
# http://bytepawn.com/hacker-news-embeddings-with-pytorch.html
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import json
from random import choice, random, shuffle
[ 58.140] (WW) Failed to open protocol names file lib/xorg/protocol.txt
[ 59.061]
X.Org X Server 1.20.7
X Protocol Version 11, Revision 0
[ 59.061] Build Operating System: Linux Arch Linux
[ 59.061] Current Operating System: Linux shougeimaru 5.6.5-arch3-1 #1 SMP PREEMPT Sun, 19 Apr 2020 13:14:25 +0000 x86_64
[ 59.061] Kernel command line: BOOT_IMAGE=/boot/vmlinuz-linux root=UUID=9342fe9f-1bc1-42e5-aa8e-b4f7d26ce115 rw quiet
[ 59.061] Build Date: 14 January 2020 07:13:52AM
[ 59.061]
[ 59.062] Current version of pixman: 0.38.4
[ 24.189703] fb0: switching to nouveaufb from VESA VGA
[ 24.190015] Console: switching to colour dummy device 80x25
[ 24.190667] nouveau 0000:02:00.0: NVIDIA GT218 (0a8200b1)
[ 24.335909] nouveau 0000:02:00.0: bios: version 70.18.45.00.00
[ 24.337951] nouveau 0000:02:00.0: fb: 512 MiB DDR3
[ 24.645854] audit: type=1130 audit(1587408617.183:11): pid=1 uid=0 auid=4294967295 ses=4294967295 msg='unit=systemd-fsck@dev-disk-by\x2dlabel-shougeimaru\x2dhome comm="systemd" exe="/usr/lib/systemd/systemd" hostname=? addr=? terminal=? res=success'
[ 24.675716] EXT4-fs (sda2): mounting ext3 file system using the ext4 subsystem
[ 24.776930] EXT4-fs (sda2): mounted filesystem with ordered data mode. Opts: data=ordered
[ 25.510645] [TTM] Zone kernel: Available graphics memory: 2015186 KiB
[ 25.510650] [TTM] Initializing pool allocator
# nvidia-xconfig: X configuration file generated by nvidia-xconfig
# nvidia-xconfig: version 304.43 ([email protected]) Sun Aug 19 21:28:54 PDT 2012
# nvidia-settings: X configuration file generated by nvidia-settings
# nvidia-settings: version 260.19.44 ([email protected]) Sun Feb 27 21:50:27 PST 2011
Section "ServerLayout"
Identifier "Layout0"
Screen 0 "Screen0" 0 0
InputDevice "Keyboard0" "CoreKeyboard"
[ 2064.502] (WW) Failed to open protocol names file lib/xorg/protocol.txt
[ 2064.503]
X.Org X Server 1.20.7
X Protocol Version 11, Revision 0
[ 2064.505] Build Operating System: Linux Arch Linux
[ 2064.506] Current Operating System: Linux shougeimaru 5.6.5-arch3-1 #1 SMP PREEMPT Sun, 19 Apr 2020 13:14:25 +0000 x86_64
[ 2064.506] Kernel command line: BOOT_IMAGE=/boot/vmlinuz-linux root=UUID=9342fe9f-1bc1-42e5-aa8e-b4f7d26ce115 rw quiet
[ 2064.508] Build Date: 14 January 2020 07:13:52AM
[ 2064.508]
[ 2064.509] Current version of pixman: 0.38.4
[ 2359.097] (WW) Failed to open protocol names file lib/xorg/protocol.txt
[ 2359.098]
X.Org X Server 1.20.7
X Protocol Version 11, Revision 0
[ 2359.100] Build Operating System: Linux Arch Linux
[ 2359.101] Current Operating System: Linux shougeimaru 5.6.5-arch3-1 #1 SMP PREEMPT Sun, 19 Apr 2020 13:14:25 +0000 x86_64
[ 2359.101] Kernel command line: BOOT_IMAGE=/boot/vmlinuz-linux root=UUID=9342fe9f-1bc1-42e5-aa8e-b4f7d26ce115 rw quiet
[ 2359.103] Build Date: 14 January 2020 07:13:52AM
[ 2359.103]
[ 2359.104] Current version of pixman: 0.38.4
@polm
polm / spacy-convert.py
Created April 30, 2020 09:18
GSD CoNLL spaCy prep script
#!/usr/bin/env python3
"""
Convert GSD conll format to a format the spaCy convert script can use as-is.
There are two main changes:
1. POS tags format is changed slightly.
old: 名詞-普通名詞-一般
24731941 年
15955060 日
13733371 月
7032890 大
6115161 本
5634170 学
5352959 人
4568971 中
4437080 国
4403844 一
@polm
polm / chasen.py
Created July 30, 2020 10:19
mecab-python3で-Ochasenを再現する方法
# -Ochasen の出力フォーマットを直接指定する
import MeCab
import ipadic
CHASEN_ARGS = r' -F "%m\t%f[7]\t%f[6]\t%F-[0,1,2,3]\t%f[4]\t%f[5]\n"'
CHASEN_ARGS += r' -U "%m\t%m\t%m\t%F-[0,1,2,3]\t\t\n"'
tagger = MeCab.Tagger(ipadic.MECAB_ARGS + CHASEN_ARGS)
print(tagger.parse("図書館にいた事がバレた"))
# 出力