iamalbert · October 25, 2016 05:57
diff --git a/argparse.py b/argparse.py
 #!/usr/bin/env python3
 import argparse
 import json
 import sys
 import collections
 import itertools

 def main(args):
  pass

 # usage: xx.py [-h] INPUT [OUTPUT]
 if __name__ == '__main__':
    ap = argparse.ArgumentParser()
    ap.add_argument('input', type=argparse.FileType('r') )
    ap.add_argument('output', type=argparse.FileType('w'),
        default="-", nargs="?")

    args = ap.parse_args()
    print(args, file=sys.stderr)
    
    main(args)
diff --git a/cjk-tokenize-segment.py b/cjk-tokenize-segment.py
 import jieba

 jieba.set_dictionary('data/dict.txt.big')
 CJK_REGEXP = re.compile(
    '[\u4e00-\ufaff]+'
    '|[A-Za-z0-9]+'
    '|[^ ]'
 )
 def CJK_chunk(s):
    #print( re.findall(CJK_REGEXP, s) )
    return [ _.strip() for _ in re.findall(CJK_REGEXP, s) ]

 def tokenize(s):
    ret = []
    for chunk in CJK_chunk(s):
        ret.extend( jieba.cut(chunk) )
    return ret
diff --git a/generate-requirement-txt.sh b/generate-requirement-txt.sh
 $ pip install pipreqs

 $ pipreqs /path/to/project
 $ pipreqs --force /path/to/project # when requirements.txt already exists

diff --git a/itertools.py b/itertools.py
 def chunked(iterable, n):
    it = iter(iterable)
    while True:
      c = tuple(itertools.islice(it, n))
      if c:
          yield c
      else:
          return

 def windowed(iterable, n, step=1):
    window = ()
    for ele in iterable:
      window += (ele,)
      if len(window) == n:
        yield window
        window = window[step:]
diff --git a/json.py b/json.py

 class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, set):
          return list(obj)   
        return json.JSONEncoder.default(self, obj)

 json.dumps({1,2,3,4,5}), cls=CustomEncoder)
	#!/usr/bin/env python3
	import argparse
	import json
	import sys
	import collections
	import itertools

	def main(args):
	pass

	# usage: xx.py [-h] INPUT [OUTPUT]
	if __name__ == '__main__':
	ap = argparse.ArgumentParser()
	ap.add_argument('input', type=argparse.FileType('r') )
	ap.add_argument('output', type=argparse.FileType('w'),
	default="-", nargs="?")

	args = ap.parse_args()
	print(args, file=sys.stderr)

	main(args)
	import jieba

	jieba.set_dictionary('data/dict.txt.big')
	CJK_REGEXP = re.compile(
	'[\u4e00-\ufaff]+'
	'\|[A-Za-z0-9]+'
	'\|[^ ]'
	)
	def CJK_chunk(s):
	#print( re.findall(CJK_REGEXP, s) )
	return [ _.strip() for _ in re.findall(CJK_REGEXP, s) ]

	def tokenize(s):
	ret = []
	for chunk in CJK_chunk(s):
	ret.extend( jieba.cut(chunk) )
	return ret
	$ pip install pipreqs

	$ pipreqs /path/to/project
	$ pipreqs --force /path/to/project # when requirements.txt already exists
	def chunked(iterable, n):
	it = iter(iterable)
	while True:
	c = tuple(itertools.islice(it, n))
	if c:
	yield c
	else:
	return

	def windowed(iterable, n, step=1):
	window = ()
	for ele in iterable:
	window += (ele,)
	if len(window) == n:
	yield window
	window = window[step:]

	class CustomEncoder(json.JSONEncoder):
	def default(self, obj):
	if isinstance(obj, set):
	return list(obj)
	return json.JSONEncoder.default(self, obj)

	json.dumps({1,2,3,4,5}), cls=CustomEncoder)