Created
January 29, 2022 09:40
-
-
Save temberature/5df2e085d69aaf3e0e39afd5fd674b20 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import jieba | |
import jieba.analyse | |
from optparse import OptionParser | |
import win32clipboard | |
import re | |
import glob | |
import os | |
def content2links(content): | |
tags = jieba.analyse.textrank(content, 5) | |
links = '' | |
for idx, tag in enumerate(tags): | |
if idx == 0: | |
prefix = '' | |
else: | |
prefix = ',' | |
links += prefix + '[[' + tag + ']]' | |
return links | |
def contentAddlinks(content): | |
tags = jieba.analyse.textrank(content, 5) | |
links = '' | |
for idx, tag in enumerate(tags): | |
if idx == 0: | |
prefix = '' | |
else: | |
prefix = ',' | |
links += prefix + '[[' + tag + ']]' | |
# content.replace(tag, '[[' + tag + ']]') | |
# content = re.sub("^.*[^\[\[])({})([^\]\]].*\n)".format(tag), r"\1[[\2]]\3", content) | |
content = links + '\n' + content | |
return content | |
def fileAddlinks(filename): | |
# print(filename) | |
f = open(filename, 'r+', encoding='UTF-8') | |
content = f.read() | |
content = contentAddlinks(content) | |
print(content) | |
f.seek(0) | |
f.write(content) | |
f.truncate() | |
f.close() | |
USAGE = "usage: python extract_tags.py [file name] -k [top k]" | |
parser = OptionParser(USAGE) | |
parser.add_option("-k", dest="topK") | |
opt, args = parser.parse_args() | |
if opt.topK is None: | |
topK = 5 | |
else: | |
topK = int(opt.topK) | |
path = args[0] | |
if len(args) < 1: | |
win32clipboard.OpenClipboard() | |
content = win32clipboard.GetClipboardData() | |
links = content2links(content) | |
# print(links) | |
win32clipboard.EmptyClipboard() | |
win32clipboard.SetClipboardText(links) | |
win32clipboard.CloseClipboard() | |
elif os.path.isdir(path): | |
print("\nIt is a directory") | |
for filename in glob.iglob(path + '**/*.md', recursive=True): | |
fileAddlinks(filename) | |
elif os.path.isfile(path): | |
print("\nIt is a normal file") | |
fileAddlinks(path) | |
else: | |
print('unknown') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment