Created
August 4, 2014 07:04
-
-
Save ytera/aac4b032a35ce2e79f54 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <ctype.h> | |
int main(int agrc, char *argv[]){ | |
FILE *fp; | |
int i=0,j,c,num_sp,head=1,num=0,ptr=0,flag_c=0,flag_of=0,flag_p=0,all=0; | |
char word[128], C_word[128]; | |
char sp[128][128], ca[128][128]; /* special_wordとキャッシュの記憶 */ | |
if ((fp = fopen("special_word.txt", "r")) == NULL) { /* special_wordファイルの読み込み */ | |
printf("\n special_word file open error!!\n"); | |
exit(EXIT_FAILURE); | |
} | |
while (fgets(sp[i], 128, fp) != NULL) { /* spに記憶 */ | |
for (j = 0; sp[i][j] != '\0'; j++); | |
sp[i][j-1] = '\0'; /* 改行コードを削除 */ | |
i++; | |
} | |
num_sp = i; /* special_wordの個数 */ | |
fclose(fp); | |
if ((fp = fopen(argv[1], "r")) == NULL) { /* テキストファイルの読み込み */ | |
printf("\n %s file open error!!\n",argv[1]); | |
exit(EXIT_FAILURE); | |
} | |
word[0] = '\0'; | |
C_word[0] = '\0'; | |
while ((c = getc(fp)) != EOF) { /* 1文字ずつ取得 */ | |
if(isalnum(c)){ /* 単語中 */ | |
int n = strlen(word); | |
word[n++] = c; | |
word[n] = '\0'; | |
} else { /* 1単語を抽出 */ | |
all++; | |
if(isupper(word[0]) != 0){ /* 語頭が大文字 */ | |
if(head==0){ /* 文頭ではない */ | |
int flag_sp = 0; /* special_wordとマッチングをかける */ | |
for(i=0;i<num_sp;i++){ | |
if(strcmp(word,sp[i]) == 0){ | |
flag_sp = 1; | |
break; | |
} | |
} | |
if(flag_sp == 0){ /* 固有名詞 => ひとまず連語と判断 */ | |
strcat(C_word,word); | |
strcat(C_word," "); | |
flag_c = 1; | |
flag_of = 0; | |
//flag_p = 1; | |
}else if(flag_sp != 0 && flag_c == 1){ /* 固有名詞ではない => 連語を記憶していた場合は出力&キャッシュ&初期化 */ | |
flag_p = 1; | |
} | |
} else { /* 文頭 => キャッシュとマッチングをかける */ | |
strcat(word," "); | |
for(j=0;j<100;j++){ | |
if(strcmp(word,ca[j])==0){ /* 固有名詞 => ひとまず連語と判断 */ | |
strcat(C_word,word); | |
strcat(C_word," "); | |
flag_c = 1; | |
flag_of = 0; | |
//flag_p = 1; | |
break; | |
} | |
} | |
head = 0; | |
} | |
}else if(isupper(word[0]) == 0 && flag_c == 1){ /* 語頭が小文字 */ | |
if(word[0] == 'o' && word[1] == 'f'){ /* ofの場合は固有名詞が続いているとみなす */ | |
strcat(C_word,word); | |
strcat(C_word," "); | |
flag_of = 1; | |
}else{ | |
flag_p = 1; | |
} | |
} | |
if(c == '.' || c == '!' ||c == '?'){ /* ここで文が終わる => 次の文字は文頭 */ | |
head = 1; | |
if(flag_c == 1){ /* 連語を記憶していた場合は出力&キャッシュ&初期化 */ | |
flag_p = 1; | |
} | |
} | |
if(flag_p == 1){ /* 連語を出力&キャッシュ&初期化 */ | |
if(flag_of == 1){ /* ofは固有名詞の一部ではなかった */ | |
int n = strlen(C_word); | |
C_word[n-3] = '\0'; | |
} | |
printf("%s\n",C_word); | |
num++; | |
strcpy(ca[ptr],C_word); | |
ptr++; | |
if(ptr==100)ptr=0; | |
C_word[0] = '\0'; | |
flag_p = 0; | |
flag_c = 0; | |
flag_of = 0; | |
} | |
word[0] = '\0'; | |
} | |
} | |
fclose(fp); | |
printf("End of file. (all,proper noun) = (%d,%d)\n",all,num); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment