Skip to content

Instantly share code, notes, and snippets.

@ai2010
Forked from Azure-rong/adj adv v feature.py
Created March 6, 2017 23:09
Show Gist options
  • Save ai2010/bf6e3e2894d51e7baa620bd6ada49259 to your computer and use it in GitHub Desktop.
Save ai2010/bf6e3e2894d51e7baa620bd6ada49259 to your computer and use it in GitHub Desktop.
Feature extraction:Review adjective, adverb and verb word's number
#! /usr/bin/env python2.7
#coding=utf-8
"""
Counting adjective words, adverbs and verbs number in the review.
This module aim to extract adjective words, adverbs and verbs number features.
"""
import textprocessing as tp
# Function of counting review adjectives adverbs and verbs numbers
def count_adj_adv(dataset):
adj_adv_num = []
a = 0
d = 0
v = 0
for review in dataset:
pos = tp.postagger(review, 'list')
for i in pos:
if i[1] == 'a':
a += 1
elif i[1] == 'd':
d += 1
elif i[1] == 'v':
v += 1
adj_adv_num.append((a, d, v))
a = 0
d = 0
v = 0
return adj_adv_num
# Store features
def store_adj_adv_v_num_feature(filepath, sheetnum, colnum, data, storepath):
data = tp.seg_fil_excel(filepath, sheetnum, colnum)
adj_adv_num = count_adj_adv(data)
f = open(storepath,'w')
for i in adj_adv_num:
f.write(str(i[0])+' '+str(i[1])+' '+str(i[2])+'\n')
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment