Created
May 6, 2017 02:00
-
-
Save todays-mitsui/c20a1ac43a7684847c9fd1600463174c to your computer and use it in GitHub Desktop.
回答 - 言語処理100本ノック 2015 - 第2章
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
10. 行数のカウント | |
行数をカウントせよ.確認にはwcコマンドを用いよ. | |
""" | |
import codecs | |
count = sum(1 for line in codecs.open("./src/hightemp.txt", "r", "utf-8")) | |
print(count) | |
# => 24 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# 10. 行数のカウント | |
# 行数をカウントせよ.確認にはwcコマンドを用いよ. | |
cat ./src/hightemp.txt | wc -l | |
# => 24 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
11. タブをスペースに置換 | |
タブ1文字につきスペース1文字に置換せよ.確認にはsedコマンド,trコマンド,もしくはexpandコマンドを用いよ. | |
""" | |
import codecs | |
for line in codecs.open("./src/hightemp.txt", "r", "utf-8"): | |
print(line.replace("\t", " "), end="") | |
# => | |
# 高知県 江川崎 41 2013-08-12 | |
# 埼玉県 熊谷 40.9 2007-08-16 | |
# ... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# 11. タブをスペースに置換 | |
# タブ1文字につきスペース1文字に置換せよ.確認にはsedコマンド,trコマンド,もしくはexpandコマンドを用いよ. | |
cat ./src/hightemp.txt | sed -e 's/\t/ /g' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
12. 1列目をcol1.txtに,2列目をcol2.txtに保存 | |
各行の1列目だけを抜き出したものをcol1.txtに,2列目だけを抜き出したものをcol2.txtとしてファイルに保存せよ.確認にはcutコマンドを用いよ. | |
""" | |
import codecs | |
with codecs.open("./dest/col1.txt", "w", "utf-8") as f1 \ | |
, codecs.open("./dest/col2.txt", "w", "utf-8") as f2: | |
for line in codecs.open("./src/hightemp.txt", "r", "utf-8"): | |
cols = line.split("\t") | |
f1.write(cols[0]+"\n") | |
f2.write(cols[1]+"\n") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# 12. 1列目をcol1.txtに,2列目をcol2.txtに保存 | |
# 各行の1列目だけを抜き出したものをcol1.txtに,2列目だけを抜き出したものをcol2.txtとしてファイルに保存せよ.確認にはcutコマンドを用いよ. | |
[ -d dest ] || mkdir dest | |
cat ./src/hightemp.txt | cut -f1 > dest/col1.txt | |
cat ./src/hightemp.txt | cut -f2 > dest/col2.txt |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
13. col1.txtとcol2.txtをマージ | |
12で作ったcol1.txtとcol2.txtを結合し,元のファイルの1列目と2列目をタブ区切りで並べたテキストファイルを作成せよ. | |
確認にはpasteコマンドを用いよ. | |
""" | |
import codecs | |
with codecs.open("./dest/col1.txt", "r", "utf-8") as rf1 \ | |
, codecs.open("./dest/col2.txt", "r", "utf-8") as rf2 \ | |
, codecs.open("./dest/col1+2.txt", "w", "utf-8") as wf: | |
for col1, col2 in zip(rf1, rf2): | |
wf.write("{0}\t{1}\n".format(col1.strip(), col2.strip())) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# 13. col1.txtとcol2.txtをマージ | |
# 12で作ったcol1.txtとcol2.txtを結合し,元のファイルの1列目と2列目をタブ区切りで並べたテキストファイルを作成せよ. | |
# 確認にはpasteコマンドを用いよ. | |
[ -d dest ] || mkdir dest | |
paste ./dest/col1.txt ./dest/col2.txt > ./dest/col1+2.txt |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
14. 先頭からN行を出力 | |
自然数Nをコマンドライン引数などの手段で受け取り,入力のうち先頭のN行だけを表示せよ.確認にはheadコマンドを用いよ. | |
""" | |
import sys | |
import codecs | |
import itertools | |
count = int(sys.argv[1]) | |
with codecs.open("./src/hightemp.txt", "r", "utf-8") as f: | |
for line in itertools.islice(f, 0, count): | |
print(line, end="") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# 14. 先頭からN行を出力 | |
# 自然数Nをコマンドライン引数などの手段で受け取り,入力のうち先頭のN行だけを表示せよ.確認にはheadコマンドを用いよ. | |
cat ./src/hightemp.txt | head -n $1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
15. 末尾のN行を出力 | |
自然数Nをコマンドライン引数などの手段で受け取り,入力のうち末尾のN行だけを表示せよ.確認にはtailコマンドを用いよ. | |
""" | |
import sys | |
import codecs | |
import itertools | |
count = int(sys.argv[1]) | |
max_count = sum(1 for line in codecs.open("./src/hightemp.txt", "r", "utf-8")) | |
with codecs.open("./src/hightemp.txt", "r", "utf-8") as f: | |
for line in itertools.islice(f, max_count - count, None): | |
print(line, end="") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# 15. 末尾のN行を出力 | |
# 自然数Nをコマンドライン引数などの手段で受け取り,入力のうち末尾のN行だけを表示せよ.確認にはtailコマンドを用いよ. | |
cat ./src/hightemp.txt | tail -n $1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
16. ファイルをN分割する | |
自然数Nをコマンドライン引数などの手段で受け取り,入力のファイルを行単位でN分割せよ.同様の処理をsplitコマンドで実現せよ. | |
""" | |
import sys | |
import codecs | |
def line_counts(max_count, n): | |
quo = max_count // n | |
rem = max_count % n | |
return [quo+1] * rem + [quo] * (n - rem) | |
n = int(sys.argv[1]) | |
max_count = sum(1 for line in codecs.open("./src/hightemp.txt", "r", "utf-8")) | |
with codecs.open("./src/hightemp.txt", "r", "utf-8") as rf: | |
for i, line_count in enumerate(line_counts(max_count, n)): | |
with codecs.open("./dest/split.{0}.txt".format(i), "w", "utf-8") as wf: | |
for _ in range(line_count): | |
wf.write(rf.readline()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
17. 1列目の文字列の異なり | |
1列目の文字列の種類(異なる文字列の集合)を求めよ.確認にはsort, uniqコマンドを用いよ. | |
""" | |
import codecs | |
prefs = set(line.split("\t")[0] for line in codecs.open("./src/hightemp.txt", "r", "utf-8")) | |
print(prefs) | |
# => | |
# { | |
# '埼玉県', '千葉県', '群馬県', '山形県', '静岡県', '愛知県', | |
# '高知県', '岐阜県', '山梨県', '愛媛県', '和歌山県', '大阪府' | |
# } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# 17. 1列目の文字列の異なり | |
# 1列目の文字列の種類(異なる文字列の集合)を求めよ.確認にはsort, uniqコマンドを用いよ. | |
cat ./src/hightemp.txt | cut -f1 | sort | uniq | |
# => | |
# 愛知県 | |
# 愛媛県 | |
# 岐阜県 | |
# 群馬県 | |
# 高知県 | |
# 埼玉県 | |
# 山形県 | |
# 山梨県 | |
# 静岡県 | |
# 千葉県 | |
# 大阪府 | |
# 和歌山県 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
18. 各行を3コラム目の数値の降順にソート | |
各行を3コラム目の数値の逆順で整列せよ(注意: 各行の内容は変更せずに並び替えよ). | |
確認にはsortコマンドを用いよ(この問題はコマンドで実行した時の結果と合わなくてもよい). | |
""" | |
import codecs | |
sorted_lines = sorted( | |
codecs.open("./src/hightemp.txt", "r", "utf-8"), | |
key=lambda line: float(line.split("\t")[2]), | |
reverse=True, | |
) | |
print("".join(sorted_lines)) | |
# => | |
# 高知県 江川崎 41 2013-08-12 | |
# 埼玉県 熊谷 40.9 2007-08-16 | |
# 岐阜県 多治見 40.9 2007-08-16 | |
# 山形県 山形 40.8 1933-07-25 | |
# 山梨県 甲府 40.7 2013-08-10 | |
# ... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# 18. 各行を3コラム目の数値の降順にソート | |
# 各行を3コラム目の数値の逆順で整列せよ(注意: 各行の内容は変更せずに並び替えよ). | |
# 確認にはsortコマンドを用いよ(この問題はコマンドで実行した時の結果と合わなくてもよい). | |
# -n オプション: 対象を数値としてソート | |
# -r オプション: 降順(逆順)ソート | |
# -k3 オプション: タブ区切りの3列目を比較対象として各順をソート | |
cat ./src/hightemp.txt | sort -nrk3 | |
# => | |
# 高知県 江川崎 41 2013-08-12 | |
# 埼玉県 熊谷 40.9 2007-08-16 | |
# 岐阜県 多治見 40.9 2007-08-16 | |
# 山形県 山形 40.8 1933-07-25 | |
# 山梨県 甲府 40.7 2013-08-10 | |
# ... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
19. 各行の1コラム目の文字列の出現頻度を求め,出現頻度の高い順に並べる | |
各行の1列目の文字列の出現頻度を求め,その高い順に並べて表示せよ.確認にはcut, uniq, sortコマンドを用いよ. | |
""" | |
import codecs | |
from collections import Counter | |
pref_counter = Counter(line.split("\t")[0] for line in codecs.open("./src/hightemp.txt", "r", "utf-8")) | |
print(pref_counter.most_common()) | |
# => | |
# [ | |
# ('山形県', 3), ('埼玉県', 3), ('群馬県', 3), ('山梨県', 3), ('岐阜県', 2), | |
# ('愛知県', 2), ('千葉県', 2), ('静岡県', 2), ('愛媛県', 1), ('高知県', 1), | |
# ('大阪府', 1), ('和歌山県', 1) | |
# ] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# 19. 各行の1コラム目の文字列の出現頻度を求め,出現頻度の高い順に並べる | |
# 各行の1列目の文字列の出現頻度を求め,その高い順に並べて表示せよ.確認にはcut, uniq, sortコマンドを用いよ. | |
cat ./src/hightemp.txt | cut -f1 | sort | uniq -c | sort -rk1 | |
# => | |
# 3 山梨県 | |
# 3 山形県 | |
# 3 埼玉県 | |
# 3 群馬県 | |
# 2 千葉県 | |
# 2 静岡県 | |
# 2 岐阜県 | |
# 2 愛知県 | |
# 1 和歌山県 | |
# 1 大阪府 | |
# 1 高知県 | |
# 1 愛媛県 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment