ytera · August 4, 2014 07:04
diff --git a/pattern_match.c b/pattern_match.c
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include <ctype.h>

 int main(int agrc, char *argv[]){

 	FILE *fp;
 	int  i=0,j,c,num_sp,head=1,num=0,ptr=0,flag_c=0,flag_of=0,flag_p=0,all=0;
 	char word[128], C_word[128];
 	char sp[128][128], ca[128][128];   /* special_wordとキャッシュの記憶 */

 	if ((fp = fopen("special_word.txt", "r")) == NULL) { /* special_wordファイルの読み込み */
 		printf("\n special_word file open error!!\n");
 		exit(EXIT_FAILURE);
 	}

 	while (fgets(sp[i], 128, fp) != NULL) { /* spに記憶 */
 		for (j = 0; sp[i][j] != '\0'; j++);
 		sp[i][j-1] = '\0';  /* 改行コードを削除 */
 		i++;
 	}
 	num_sp = i; /* special_wordの個数 */
 	fclose(fp);	

 	if ((fp = fopen(argv[1], "r")) == NULL) { /* テキストファイルの読み込み */
 		printf("\n %s file open error!!\n",argv[1]);
 		exit(EXIT_FAILURE);
 	}

 	word[0] = '\0';  
 	C_word[0] = '\0';

 	while ((c = getc(fp)) != EOF) {  /* １文字ずつ取得 */

 		if(isalnum(c)){  /* 単語中 */

 			int n = strlen(word);
 			word[n++] = c;
 			word[n] = '\0';

 		} else {  /* １単語を抽出 */
 			all++;
 			if(isupper(word[0]) != 0){  /* 語頭が大文字 */

 				if(head==0){ /* 文頭ではない */

 					int flag_sp = 0;  /* special_wordとマッチングをかける */
 					for(i=0;i<num_sp;i++){  
 						if(strcmp(word,sp[i]) == 0){ 
 							flag_sp = 1;
 							break;
 						}
 					}

 					if(flag_sp == 0){ /* 固有名詞 => ひとまず連語と判断 */
 						strcat(C_word,word);
 						strcat(C_word," ");
 						flag_c = 1;
 						flag_of = 0;
 						//flag_p = 1;
 					}else if(flag_sp != 0 && flag_c == 1){  /* 固有名詞ではない　=> 連語を記憶していた場合は出力＆キャッシュ＆初期化 */
 						flag_p = 1;
 					}

 				} else {  /* 文頭 => キャッシュとマッチングをかける */
 					strcat(word," ");
 					for(j=0;j<100;j++){	
 						if(strcmp(word,ca[j])==0){  /* 固有名詞 => ひとまず連語と判断 */
 							strcat(C_word,word);
 							strcat(C_word," ");
 							flag_c = 1;
 							flag_of = 0;
 							//flag_p = 1;
 							break;
 						}
 					}
 					head = 0;
 				}

 			}else if(isupper(word[0]) == 0 && flag_c == 1){ /* 語頭が小文字 */

 				if(word[0] == 'o' && word[1] == 'f'){ /* ofの場合は固有名詞が続いているとみなす */

 					strcat(C_word,word);
 					strcat(C_word," ");
 					flag_of = 1;

 				}else{
 					flag_p = 1;
 				}		
 			}

 			if(c == '.' || c == '!' ||c == '?'){   /* ここで文が終わる => 次の文字は文頭 */
 				head = 1;
 				if(flag_c == 1){ 	/* 連語を記憶していた場合は出力＆キャッシュ＆初期化 */
 					flag_p = 1;
 				}
 			}

 			if(flag_p == 1){ /* 連語を出力＆キャッシュ＆初期化 */
 				if(flag_of == 1){  /* ofは固有名詞の一部ではなかった */
 					int n = strlen(C_word);
 					C_word[n-3] = '\0';
 				}
 				printf("%s\n",C_word);
 				num++;
 				strcpy(ca[ptr],C_word);
 				ptr++;
 				if(ptr==100)ptr=0;
 				C_word[0] = '\0';			
 				flag_p = 0;
 				flag_c = 0;
 				flag_of = 0;			
 			}
 			word[0] = '\0';
 		}
 	}

 	fclose(fp);	
 	printf("End of file. (all,proper noun) = (%d,%d)\n",all,num);
 	return 0;
 }
	#include <stdlib.h>
	#include <stdio.h>
	#include <string.h>
	#include <ctype.h>

	int main(int agrc, char *argv[]){

	FILE *fp;
	int i=0,j,c,num_sp,head=1,num=0,ptr=0,flag_c=0,flag_of=0,flag_p=0,all=0;
	char word[128], C_word[128];
	char sp[128][128], ca[128][128]; /* special_wordとキャッシュの記憶 */

	if ((fp = fopen("special_word.txt", "r")) == NULL) { /* special_wordファイルの読み込み */
	printf("\n special_word file open error!!\n");
	exit(EXIT_FAILURE);
	}

	while (fgets(sp[i], 128, fp) != NULL) { /* spに記憶 */
	for (j = 0; sp[i][j] != '\0'; j++);
	sp[i][j-1] = '\0'; /* 改行コードを削除 */
	i++;
	}
	num_sp = i; /* special_wordの個数 */
	fclose(fp);

	if ((fp = fopen(argv[1], "r")) == NULL) { /* テキストファイルの読み込み */
	printf("\n %s file open error!!\n",argv[1]);
	exit(EXIT_FAILURE);
	}

	word[0] = '\0';
	C_word[0] = '\0';

	while ((c = getc(fp)) != EOF) { /* １文字ずつ取得 */

	if(isalnum(c)){ /* 単語中 */

	int n = strlen(word);
	word[n++] = c;
	word[n] = '\0';

	} else { /* １単語を抽出 */
	all++;
	if(isupper(word[0]) != 0){ /* 語頭が大文字 */

	if(head==0){ /* 文頭ではない */

	int flag_sp = 0; /* special_wordとマッチングをかける */
	for(i=0;i<num_sp;i++){
	if(strcmp(word,sp[i]) == 0){
	flag_sp = 1;
	break;
	}
	}

	if(flag_sp == 0){ /* 固有名詞 => ひとまず連語と判断 */
	strcat(C_word,word);
	strcat(C_word," ");
	flag_c = 1;
	flag_of = 0;
	//flag_p = 1;
	}else if(flag_sp != 0 && flag_c == 1){ /* 固有名詞ではない　=> 連語を記憶していた場合は出力＆キャッシュ＆初期化 */
	flag_p = 1;
	}

	} else { /* 文頭 => キャッシュとマッチングをかける */
	strcat(word," ");
	for(j=0;j<100;j++){
	if(strcmp(word,ca[j])==0){ /* 固有名詞 => ひとまず連語と判断 */
	strcat(C_word,word);
	strcat(C_word," ");
	flag_c = 1;
	flag_of = 0;
	//flag_p = 1;
	break;
	}
	}
	head = 0;
	}

	}else if(isupper(word[0]) == 0 && flag_c == 1){ /* 語頭が小文字 */

	if(word[0] == 'o' && word[1] == 'f'){ /* ofの場合は固有名詞が続いているとみなす */

	strcat(C_word,word);
	strcat(C_word," ");
	flag_of = 1;

	}else{
	flag_p = 1;
	}
	}

	if(c == '.' \|\| c == '!' \|\|c == '?'){ /* ここで文が終わる => 次の文字は文頭 */
	head = 1;
	if(flag_c == 1){ /* 連語を記憶していた場合は出力＆キャッシュ＆初期化 */
	flag_p = 1;
	}
	}

	if(flag_p == 1){ /* 連語を出力＆キャッシュ＆初期化 */
	if(flag_of == 1){ /* ofは固有名詞の一部ではなかった */
	int n = strlen(C_word);
	C_word[n-3] = '\0';
	}
	printf("%s\n",C_word);
	num++;
	strcpy(ca[ptr],C_word);
	ptr++;
	if(ptr==100)ptr=0;
	C_word[0] = '\0';
	flag_p = 0;
	flag_c = 0;
	flag_of = 0;
	}
	word[0] = '\0';
	}
	}

	fclose(fp);
	printf("End of file. (all,proper noun) = (%d,%d)\n",all,num);
	return 0;
	}