Created
March 18, 2013 05:44
-
-
Save zhuqling/5185274 to your computer and use it in GitHub Desktop.
敏感关键字匹配在线产品查找
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import collections | |
# 敏感关键字匹配在线产品查找 | |
title_width = 60 | |
FIELD_ITEMID = 0 | |
FIELD_SKU = 1 | |
FIELD_TITLE = 2 | |
Listing = collections.namedtuple('Listing', "ItemID SKU Title") | |
def parser_listing(line): | |
if line and len(line) > 0: | |
item_id = line[:12] | |
sku = line[13:150].strip() | |
title = line[150:].strip().lower() | |
return Listing(item_id, sku, title) | |
else: | |
return None | |
def matched(keyword, title): | |
if not lineOfKeyword or len(lineOfKeyword) == 0 or keyword not in title: | |
return False | |
for ext_keyword in [keyword+' ', keyword+'-', keyword+', ', keyword+'.', keyword+'/', keyword+';', \ | |
' '+keyword, '-'+keyword, '.'+keyword, '/'+keyword, ';'+keyword]: | |
if ext_keyword in title: | |
return True | |
return False | |
FILENAME_KEYWORDS = 'C:/Users/zhuqling/Desktop/senstive_keywords.rpt' | |
FILENAME_LISTING = 'C:/Users/zhuqling/Desktop/listing.rpt' | |
keywords = [] | |
# 添加关键字 | |
for lineOfKeyword in open(FILENAME_KEYWORDS, encoding='utf8'): | |
if lineOfKeyword and len(lineOfKeyword) > 0: | |
keywords.append(lineOfKeyword.strip().lower()) | |
print("关键字数目:{0}".format(len(keywords))) | |
# 循环所有Listing,判断是否关键字匹配 | |
lists = [] | |
for lineOfListing in open(FILENAME_LISTING, encoding='utf8'): | |
lists.append(parser_listing(lineOfListing)) | |
print("在线产品数目:{0}".format(len(lists))) | |
print("匹配结果:") | |
match_count = 0 | |
for list in lists: | |
title = list[FIELD_TITLE] | |
#print(title) | |
for lineOfKeyword in keywords: | |
#print(lineOfKeyword) | |
if matched(lineOfKeyword, title): | |
print("{0:<20} {1:>14} {2:<24} {3:<{tw}}".format(lineOfKeyword, list[FIELD_ITEMID], list[FIELD_SKU], | |
list[FIELD_TITLE] if len(list[FIELD_TITLE]) <=title_width else list[FIELD_TITLE][:title_width-3] + "..." , | |
tw = title_width)) | |
match_count += 1 | |
print("匹配数目:{0}".format(match_count)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment