SaitoAtsushi · November 21, 2015 10:22
diff --git a/word2vec-mingw32.patch b/word2vec-mingw32.patch
 Index: word2phrase.c
 ===================================================================
 --- word2phrase.c	(リビジョン 42)
 +++ word2phrase.c	(作業コピー)
 @@ -16,8 +16,9 @@
 #include <stdlib.h>
 #include <string.h>
 #include <math.h>
 -#include <pthread.h>
 +#include <inttypes.h>
 
 +#define PRIdLLD "%"PRId64
 #define MAX_STRING 60
 
 const int vocab_hash_size = 500000000; // Maximum 500M entries in the vocabulary
 @@ -176,7 +177,7 @@
     } else start = 0;
     train_words++;
     if ((debug_mode > 1) && (train_words % 100000 == 0)) {
 -      printf("Words processed: %lldK     Vocab size: %lldK  %c", train_words / 1000, vocab_size / 1000, 13);
 +      printf("Words processed: " PRIdLLD "K     Vocab size: " PRIdLLD "K  %c", train_words / 1000, vocab_size / 1000, 13);
       fflush(stdout);
     }
     i = SearchVocab(word);
 @@ -197,8 +198,8 @@
   }
   SortVocab();
   if (debug_mode > 0) {
 -    printf("\nVocab size (unigrams + bigrams): %lld\n", vocab_size);
 -    printf("Words in train file: %lld\n", train_words);
 +    printf("\nVocab size (unigrams + bigrams): " PRIdLLD "\n", vocab_size);
 +    printf("Words in train file: " PRIdLLD "\n", train_words);
   }
   fclose(fin);
 }
 @@ -223,7 +224,7 @@
     }
     cn++;
     if ((debug_mode > 1) && (cn % 100000 == 0)) {
 -      printf("Words written: %lldK%c", cn / 1000, 13);
 +      printf("Words written: " PRIdLLD "%c", cn / 1000, 13);
       fflush(stdout);
     }
     oov = 0;
 Index: word-analogy.c
 ===================================================================
 --- word-analogy.c	(リビジョン 42)
 +++ word-analogy.c	(作業コピー)
 @@ -16,7 +16,10 @@
 #include <string.h>
 #include <math.h>
 #include <malloc.h>
 +#include <inttypes.h>
 
 +#define PRIdLLD "%"PRId64
 +
 const long long max_size = 2000;         // max length of strings
 const long long N = 40;                  // number of closest words that will be shown
 const long long max_w = 50;              // max length of vocabulary entries
 @@ -28,7 +31,6 @@
   char file_name[max_size], st[100][max_size];
   float dist, len, bestd[N], vec[max_size];
   long long words, size, a, b, c, d, cn, bi[100];
 -  char ch;
   float *M;
   char *vocab;
   if (argc < 2) {
 @@ -41,12 +43,12 @@
     printf("Input file not found\n");
     return -1;
   }
 -  fscanf(f, "%lld", &words);
 -  fscanf(f, "%lld", &size);
 +  fscanf(f, PRIdLLD, &words);
 +  fscanf(f, PRIdLLD, &size);
   vocab = (char *)malloc((long long)words * max_w * sizeof(char));
   M = (float *)malloc((long long)words * (long long)size * sizeof(float));
   if (M == NULL) {
 -    printf("Cannot allocate memory: %lld MB    %lld  %lld\n", (long long)words * size * sizeof(float) / 1048576, words, size);
 +    printf("Cannot allocate memory: " PRIdLLD " MB    " PRIdLLD "  " PRIdLLD "\n", (long long)words * size * sizeof(float) / 1048576, words, size);
     return -1;
   }
   for (b = 0; b < words; b++) {
 @@ -68,6 +70,7 @@
     for (a = 0; a < N; a++) bestd[a] = 0;
     for (a = 0; a < N; a++) bestw[a][0] = 0;
     printf("Enter three words (EXIT to break): ");
 +    fflush(stdout);
     a = 0;
     while (1) {
       st1[a] = fgetc(stdin);
 @@ -95,14 +98,14 @@
     }
     cn++;
     if (cn < 3) {
 -      printf("Only %lld words were entered.. three words are needed at the input to perform the calculation\n", cn);
 +      printf("Only " PRIdLLD " words were entered.. three words are needed at the input to perform the calculation\n", cn);
       continue;
     }
     for (a = 0; a < cn; a++) {
       for (b = 0; b < words; b++) if (!strcmp(&vocab[b * max_w], st[a])) break;
       if (b == words) b = 0;
       bi[a] = b;
 -      printf("\nWord: %s  Position in vocabulary: %lld\n", st[a], bi[a]);
 +      printf("\nWord: %s  Position in vocabulary: " PRIdLLD "\n", st[a], bi[a]);
       if (b == 0) {
         printf("Out of dictionary word!\n");
         break;
 Index: compute-accuracy.c
 ===================================================================
 --- compute-accuracy.c	(リビジョン 42)
 +++ compute-accuracy.c	(作業コピー)
 @@ -18,7 +18,10 @@
 #include <math.h>
 #include <malloc.h>
 #include <ctype.h>
 +#include <inttypes.h>
 
 +#define PRIdLLD "%"PRId64
 +
 const long long max_size = 2000;         // max length of strings
 const long long N = 1;                   // number of closest words
 const long long max_w = 50;              // max length of vocabulary entries
 @@ -26,7 +29,7 @@
 int main(int argc, char **argv)
 {
   FILE *f;
 -  char st1[max_size], st2[max_size], st3[max_size], st4[max_size], bestw[N][max_size], file_name[max_size], ch;
 +  char st1[max_size], st2[max_size], st3[max_size], st4[max_size], bestw[N][max_size], file_name[max_size];
   float dist, len, bestd[N], vec[max_size];
   long long words, size, a, b, c, d, b1, b2, b3, threshold = 0;
   float *M;
 @@ -43,13 +46,13 @@
     printf("Input file not found\n");
     return -1;
   }
 -  fscanf(f, "%lld", &words);
 +  fscanf(f, PRIdLLD, &words);
   if (threshold) if (words > threshold) words = threshold;
 -  fscanf(f, "%lld", &size);
 +  fscanf(f, PRIdLLD, &size);
   vocab = (char *)malloc(words * max_w * sizeof(char));
   M = (float *)malloc(words * size * sizeof(float));
   if (M == NULL) {
 -    printf("Cannot allocate memory: %lld MB\n", words * size * sizeof(float) / 1048576);
 +    printf("Cannot allocate memory: " PRIdLLD " MB\n", words * size * sizeof(float) / 1048576);
     return -1;
   }
   for (b = 0; b < words; b++) {
 Index: distance.c
 ===================================================================
 --- distance.c	(リビジョン 42)
 +++ distance.c	(作業コピー)
 @@ -16,7 +16,10 @@
 #include <string.h>
 #include <math.h>
 #include <malloc.h>
 +#include <inttypes.h>
 
 +#define PRIdLLD "%"PRId64
 +
 const long long max_size = 2000;         // max length of strings
 const long long N = 40;                  // number of closest words that will be shown
 const long long max_w = 50;              // max length of vocabulary entries
 @@ -28,7 +31,6 @@
   char file_name[max_size], st[100][max_size];
   float dist, len, bestd[N], vec[max_size];
   long long words, size, a, b, c, d, cn, bi[100];
 -  char ch;
   float *M;
   char *vocab;
   if (argc < 2) {
 @@ -41,13 +43,13 @@
     printf("Input file not found\n");
     return -1;
   }
 -  fscanf(f, "%lld", &words);
 -  fscanf(f, "%lld", &size);
 +  fscanf(f, PRIdLLD, &words);
 +  fscanf(f, PRIdLLD, &size);
   vocab = (char *)malloc((long long)words * max_w * sizeof(char));
   for (a = 0; a < N; a++) bestw[a] = (char *)malloc(max_size * sizeof(char));
   M = (float *)malloc((long long)words * (long long)size * sizeof(float));
   if (M == NULL) {
 -    printf("Cannot allocate memory: %lld MB    %lld  %lld\n", (long long)words * size * sizeof(float) / 1048576, words, size);
 +    printf("Cannot allocate memory: " PRIdLLD " MB    " PRIdLLD "  " PRIdLLD "\n", (long long)words * size * sizeof(float) / 1048576, words, size);
     return -1;
   }
   for (b = 0; b < words; b++) {
 @@ -69,6 +71,7 @@
     for (a = 0; a < N; a++) bestd[a] = 0;
     for (a = 0; a < N; a++) bestw[a][0] = 0;
     printf("Enter word or sentence (EXIT to break): ");
 +    fflush(stdout);
     a = 0;
     while (1) {
       st1[a] = fgetc(stdin);
 @@ -99,7 +102,7 @@
       for (b = 0; b < words; b++) if (!strcmp(&vocab[b * max_w], st[a])) break;
       if (b == words) b = -1;
       bi[a] = b;
 -      printf("\nWord: %s  Position in vocabulary: %lld\n", st[a], bi[a]);
 +      printf("\nWord: %s  Position in vocabulary: " PRIdLLD "\n", st[a], bi[a]);
       if (b == -1) {
         printf("Out of dictionary word!\n");
         break;
 Index: word2vec.c
 ===================================================================
 --- word2vec.c	(リビジョン 42)
 +++ word2vec.c	(作業コピー)
 @@ -16,7 +16,11 @@
 #include <stdlib.h>
 #include <string.h>
 #include <math.h>
 -#include <pthread.h>
 +#include <time.h>
 +#include <inttypes.h>
 +#include <windows.h>
 +#include <process.h>
 +#include <malloc.h>
 
 #define MAX_STRING 100
 #define EXP_TABLE_SIZE 1000
 @@ -24,6 +28,8 @@
 #define MAX_SENTENCE_LENGTH 1000
 #define MAX_CODE_LENGTH 40
 
 +#define PRIdLLD "%"PRId64
 +
 const int vocab_hash_size = 30000000;  // Maximum 30 * 0.7 = 21M words in the vocabulary
 
 typedef float real;                    // Precision of float numbers
 @@ -317,13 +323,13 @@
     ReadWord(word, fin);
     if (feof(fin)) break;
     a = AddWordToVocab(word);
 -    fscanf(fin, "%lld%c", &vocab[a].cn, &c);
 +    fscanf(fin, PRIdLLD"%c", &vocab[a].cn, &c);
     i++;
   }
   SortVocab();
   if (debug_mode > 0) {
 -    printf("Vocab size: %lld\n", vocab_size);
 -    printf("Words in train file: %lld\n", train_words);
 +    printf("Vocab size: " PRIdLLD "\n", vocab_size);
 +    printf("Words in train file: " PRIdLLD "\n", train_words);
   }
   fin = fopen(train_file, "rb");
   if (fin == NULL) {
 @@ -338,16 +344,17 @@
 void InitNet() {
   long long a, b;
   unsigned long long next_random = 1;
 -  a = posix_memalign((void **)&syn0, 128, (long long)vocab_size * layer1_size * sizeof(real));
 +  syn0 = __mingw_aligned_malloc((long long)vocab_size * layer1_size * sizeof(real), 128);
 +
   if (syn0 == NULL) {printf("Memory allocation failed\n"); exit(1);}
   if (hs) {
 -    a = posix_memalign((void **)&syn1, 128, (long long)vocab_size * layer1_size * sizeof(real));
 +    syn1 = __mingw_aligned_malloc((long long)vocab_size * layer1_size * sizeof(real), 128);
     if (syn1 == NULL) {printf("Memory allocation failed\n"); exit(1);}
     for (a = 0; a < vocab_size; a++) for (b = 0; b < layer1_size; b++)
      syn1[a * layer1_size + b] = 0;
   }
   if (negative>0) {
 -    a = posix_memalign((void **)&syn1neg, 128, (long long)vocab_size * layer1_size * sizeof(real));
 +    syn1neg = __mingw_aligned_malloc((long long)vocab_size * layer1_size * sizeof(real), 128);
     if (syn1neg == NULL) {printf("Memory allocation failed\n"); exit(1);}
     for (a = 0; a < vocab_size; a++) for (b = 0; b < layer1_size; b++)
      syn1neg[a * layer1_size + b] = 0;
 @@ -359,11 +366,12 @@
   CreateBinaryTree();
 }
 
 -void *TrainModelThread(void *id) {
 +__stdcall unsigned int TrainModelThread(void *arg) {
 +  long id = (long) arg;
   long long a, b, d, cw, word, last_word, sentence_length = 0, sentence_position = 0;
   long long word_count = 0, last_word_count = 0, sen[MAX_SENTENCE_LENGTH + 1];
   long long l1, l2, c, target, label, local_iter = iter;
 -  unsigned long long next_random = (long long)id;
 +  unsigned long long next_random = (long long) id;
   real f, g;
   clock_t now;
   real *neu1 = (real *)calloc(layer1_size, sizeof(real));
 @@ -538,13 +546,14 @@
   fclose(fi);
   free(neu1);
   free(neu1e);
 -  pthread_exit(NULL);
 +  _endthreadex(0);
 +  return 0; /* unreachable */
 }
 
 void TrainModel() {
   long a, b, c, d;
   FILE *fo;
 -  pthread_t *pt = (pthread_t *)malloc(num_threads * sizeof(pthread_t));
 +  HANDLE *pt = malloc(num_threads * sizeof(HANDLE));
   printf("Starting training using file %s\n", train_file);
   starting_alpha = alpha;
   if (read_vocab_file[0] != 0) ReadVocab(); else LearnVocabFromTrainFile();
 @@ -553,12 +562,14 @@
   InitNet();
   if (negative > 0) InitUnigramTable();
   start = clock();
 -  for (a = 0; a < num_threads; a++) pthread_create(&pt[a], NULL, TrainModelThread, (void *)a);
 -  for (a = 0; a < num_threads; a++) pthread_join(pt[a], NULL);
 +  for (a = 0; a < num_threads; a++)
 +    pt[a] = (HANDLE) _beginthreadex(NULL, 0, TrainModelThread, (void *)a, 0, NULL); 
 +  WaitForMultipleObjects(num_threads, pt, TRUE, INFINITE);
 +  for (a = 0; a < num_threads; a++) CloseHandle(pt[a]);
   fo = fopen(output_file, "wb");
   if (classes == 0) {
     // Save the word vectors
 -    fprintf(fo, "%lld %lld\n", vocab_size, layer1_size);
 +    fprintf(fo, PRIdLLD " " PRIdLLD "\n", vocab_size, layer1_size);
     for (a = 0; a < vocab_size; a++) {
       fprintf(fo, "%s ", vocab[a].word);
       if (binary) for (b = 0; b < layer1_size; b++) fwrite(&syn0[a * layer1_size + b], sizeof(real), 1, fo);
 Index: makefile
 ===================================================================
 --- makefile	(リビジョン 42)
 +++ makefile	(作業コピー)
 @@ -1,6 +1,7 @@
 CC = gcc
 #Using -Ofast instead of -O3 might result in faster code, but is supported only by newer GCC versions
 -CFLAGS = -lm -pthread -O3 -march=native -Wall -funroll-loops -Wno-unused-result
 +CFLAGS = -lm -std=c99 -O3 -march=native -Wall -funroll-loops -Wno-unused-result
 +EXT = .exe
 
 all: word2vec word2phrase distance word-analogy compute-accuracy
 
 @@ -17,4 +18,4 @@
 	chmod +x *.sh
 
 clean:
 -	rm -rf word2vec word2phrase distance word-analogy compute-accuracy
 \ No newline at end of file
 +	rm -rf word2vec$(EXT) word2phrase$(EXT) distance$(EXT) word-analogy$(EXT) compute-accuracy$(EXT)
	Index: word2phrase.c
	===================================================================
	--- word2phrase.c (リビジョン 42)
	+++ word2phrase.c (作業コピー)
	@@ -16,8 +16,9 @@
	#include <stdlib.h>
	#include <string.h>
	#include <math.h>
	-#include <pthread.h>
	+#include <inttypes.h>

	+#define PRIdLLD "%"PRId64
	#define MAX_STRING 60

	const int vocab_hash_size = 500000000; // Maximum 500M entries in the vocabulary
	@@ -176,7 +177,7 @@
	} else start = 0;
	train_words++;
	if ((debug_mode > 1) && (train_words % 100000 == 0)) {
	- printf("Words processed: %lldK Vocab size: %lldK %c", train_words / 1000, vocab_size / 1000, 13);
	+ printf("Words processed: " PRIdLLD "K Vocab size: " PRIdLLD "K %c", train_words / 1000, vocab_size / 1000, 13);
	fflush(stdout);
	}
	i = SearchVocab(word);
	@@ -197,8 +198,8 @@
	}
	SortVocab();
	if (debug_mode > 0) {
	- printf("\nVocab size (unigrams + bigrams): %lld\n", vocab_size);
	- printf("Words in train file: %lld\n", train_words);
	+ printf("\nVocab size (unigrams + bigrams): " PRIdLLD "\n", vocab_size);
	+ printf("Words in train file: " PRIdLLD "\n", train_words);
	}
	fclose(fin);
	}
	@@ -223,7 +224,7 @@
	}
	cn++;
	if ((debug_mode > 1) && (cn % 100000 == 0)) {
	- printf("Words written: %lldK%c", cn / 1000, 13);
	+ printf("Words written: " PRIdLLD "%c", cn / 1000, 13);
	fflush(stdout);
	}
	oov = 0;
	Index: word-analogy.c
	===================================================================
	--- word-analogy.c (リビジョン 42)
	+++ word-analogy.c (作業コピー)
	@@ -16,7 +16,10 @@
	#include <string.h>
	#include <math.h>
	#include <malloc.h>
	+#include <inttypes.h>

	+#define PRIdLLD "%"PRId64
	+
	const long long max_size = 2000; // max length of strings
	const long long N = 40; // number of closest words that will be shown
	const long long max_w = 50; // max length of vocabulary entries
	@@ -28,7 +31,6 @@
	char file_name[max_size], st[100][max_size];
	float dist, len, bestd[N], vec[max_size];
	long long words, size, a, b, c, d, cn, bi[100];
	- char ch;
	float *M;
	char *vocab;
	if (argc < 2) {
	@@ -41,12 +43,12 @@
	printf("Input file not found\n");
	return -1;
	}
	- fscanf(f, "%lld", &words);
	- fscanf(f, "%lld", &size);
	+ fscanf(f, PRIdLLD, &words);
	+ fscanf(f, PRIdLLD, &size);
	vocab = (char )malloc((long long)words max_w * sizeof(char));
	M = (float )malloc((long long)words (long long)size * sizeof(float));
	if (M == NULL) {
	- printf("Cannot allocate memory: %lld MB %lld %lld\n", (long long)words * size * sizeof(float) / 1048576, words, size);
	+ printf("Cannot allocate memory: " PRIdLLD " MB " PRIdLLD " " PRIdLLD "\n", (long long)words * size * sizeof(float) / 1048576, words, size);
	return -1;
	}
	for (b = 0; b < words; b++) {
	@@ -68,6 +70,7 @@
	for (a = 0; a < N; a++) bestd[a] = 0;
	for (a = 0; a < N; a++) bestw[a][0] = 0;
	printf("Enter three words (EXIT to break): ");
	+ fflush(stdout);
	a = 0;
	while (1) {
	st1[a] = fgetc(stdin);
	@@ -95,14 +98,14 @@
	}
	cn++;
	if (cn < 3) {
	- printf("Only %lld words were entered.. three words are needed at the input to perform the calculation\n", cn);
	+ printf("Only " PRIdLLD " words were entered.. three words are needed at the input to perform the calculation\n", cn);
	continue;
	}
	for (a = 0; a < cn; a++) {
	for (b = 0; b < words; b++) if (!strcmp(&vocab[b * max_w], st[a])) break;
	if (b == words) b = 0;
	bi[a] = b;
	- printf("\nWord: %s Position in vocabulary: %lld\n", st[a], bi[a]);
	+ printf("\nWord: %s Position in vocabulary: " PRIdLLD "\n", st[a], bi[a]);
	if (b == 0) {
	printf("Out of dictionary word!\n");
	break;
	Index: compute-accuracy.c
	===================================================================
	--- compute-accuracy.c (リビジョン 42)
	+++ compute-accuracy.c (作業コピー)
	@@ -18,7 +18,10 @@
	#include <math.h>
	#include <malloc.h>
	#include <ctype.h>
	+#include <inttypes.h>

	+#define PRIdLLD "%"PRId64
	+
	const long long max_size = 2000; // max length of strings
	const long long N = 1; // number of closest words
	const long long max_w = 50; // max length of vocabulary entries
	@@ -26,7 +29,7 @@
	int main(int argc, char **argv)
	{
	FILE *f;
	- char st1[max_size], st2[max_size], st3[max_size], st4[max_size], bestw[N][max_size], file_name[max_size], ch;
	+ char st1[max_size], st2[max_size], st3[max_size], st4[max_size], bestw[N][max_size], file_name[max_size];
	float dist, len, bestd[N], vec[max_size];
	long long words, size, a, b, c, d, b1, b2, b3, threshold = 0;
	float *M;
	@@ -43,13 +46,13 @@
	printf("Input file not found\n");
	return -1;
	}
	- fscanf(f, "%lld", &words);
	+ fscanf(f, PRIdLLD, &words);
	if (threshold) if (words > threshold) words = threshold;
	- fscanf(f, "%lld", &size);
	+ fscanf(f, PRIdLLD, &size);
	vocab = (char )malloc(words max_w * sizeof(char));
	M = (float )malloc(words size * sizeof(float));
	if (M == NULL) {
	- printf("Cannot allocate memory: %lld MB\n", words * size * sizeof(float) / 1048576);
	+ printf("Cannot allocate memory: " PRIdLLD " MB\n", words * size * sizeof(float) / 1048576);
	return -1;
	}
	for (b = 0; b < words; b++) {
	Index: distance.c
	===================================================================
	--- distance.c (リビジョン 42)
	+++ distance.c (作業コピー)
	@@ -16,7 +16,10 @@
	#include <string.h>
	#include <math.h>
	#include <malloc.h>
	+#include <inttypes.h>

	+#define PRIdLLD "%"PRId64
	+
	const long long max_size = 2000; // max length of strings
	const long long N = 40; // number of closest words that will be shown
	const long long max_w = 50; // max length of vocabulary entries
	@@ -28,7 +31,6 @@
	char file_name[max_size], st[100][max_size];
	float dist, len, bestd[N], vec[max_size];
	long long words, size, a, b, c, d, cn, bi[100];
	- char ch;
	float *M;
	char *vocab;
	if (argc < 2) {
	@@ -41,13 +43,13 @@
	printf("Input file not found\n");
	return -1;
	}
	- fscanf(f, "%lld", &words);
	- fscanf(f, "%lld", &size);
	+ fscanf(f, PRIdLLD, &words);
	+ fscanf(f, PRIdLLD, &size);
	vocab = (char )malloc((long long)words max_w * sizeof(char));
	for (a = 0; a < N; a++) bestw[a] = (char )malloc(max_size sizeof(char));
	M = (float )malloc((long long)words (long long)size * sizeof(float));
	if (M == NULL) {
	- printf("Cannot allocate memory: %lld MB %lld %lld\n", (long long)words * size * sizeof(float) / 1048576, words, size);
	+ printf("Cannot allocate memory: " PRIdLLD " MB " PRIdLLD " " PRIdLLD "\n", (long long)words * size * sizeof(float) / 1048576, words, size);
	return -1;
	}
	for (b = 0; b < words; b++) {
	@@ -69,6 +71,7 @@
	for (a = 0; a < N; a++) bestd[a] = 0;
	for (a = 0; a < N; a++) bestw[a][0] = 0;
	printf("Enter word or sentence (EXIT to break): ");
	+ fflush(stdout);
	a = 0;
	while (1) {
	st1[a] = fgetc(stdin);
	@@ -99,7 +102,7 @@
	for (b = 0; b < words; b++) if (!strcmp(&vocab[b * max_w], st[a])) break;
	if (b == words) b = -1;
	bi[a] = b;
	- printf("\nWord: %s Position in vocabulary: %lld\n", st[a], bi[a]);
	+ printf("\nWord: %s Position in vocabulary: " PRIdLLD "\n", st[a], bi[a]);
	if (b == -1) {
	printf("Out of dictionary word!\n");
	break;
	Index: word2vec.c
	===================================================================
	--- word2vec.c (リビジョン 42)
	+++ word2vec.c (作業コピー)
	@@ -16,7 +16,11 @@
	#include <stdlib.h>
	#include <string.h>
	#include <math.h>
	-#include <pthread.h>
	+#include <time.h>
	+#include <inttypes.h>
	+#include <windows.h>
	+#include <process.h>
	+#include <malloc.h>

	#define MAX_STRING 100
	#define EXP_TABLE_SIZE 1000
	@@ -24,6 +28,8 @@
	#define MAX_SENTENCE_LENGTH 1000
	#define MAX_CODE_LENGTH 40

	+#define PRIdLLD "%"PRId64
	+
	const int vocab_hash_size = 30000000; // Maximum 30 * 0.7 = 21M words in the vocabulary

	typedef float real; // Precision of float numbers
	@@ -317,13 +323,13 @@
	ReadWord(word, fin);
	if (feof(fin)) break;
	a = AddWordToVocab(word);
	- fscanf(fin, "%lld%c", &vocab[a].cn, &c);
	+ fscanf(fin, PRIdLLD"%c", &vocab[a].cn, &c);
	i++;
	}
	SortVocab();
	if (debug_mode > 0) {
	- printf("Vocab size: %lld\n", vocab_size);
	- printf("Words in train file: %lld\n", train_words);
	+ printf("Vocab size: " PRIdLLD "\n", vocab_size);
	+ printf("Words in train file: " PRIdLLD "\n", train_words);
	}
	fin = fopen(train_file, "rb");
	if (fin == NULL) {
	@@ -338,16 +344,17 @@
	void InitNet() {
	long long a, b;
	unsigned long long next_random = 1;
	- a = posix_memalign((void *)&syn0, 128, (long long)vocab_size layer1_size * sizeof(real));
	+ syn0 = __mingw_aligned_malloc((long long)vocab_size * layer1_size * sizeof(real), 128);
	+
	if (syn0 == NULL) {printf("Memory allocation failed\n"); exit(1);}
	if (hs) {
	- a = posix_memalign((void *)&syn1, 128, (long long)vocab_size layer1_size * sizeof(real));
	+ syn1 = __mingw_aligned_malloc((long long)vocab_size * layer1_size * sizeof(real), 128);
	if (syn1 == NULL) {printf("Memory allocation failed\n"); exit(1);}
	for (a = 0; a < vocab_size; a++) for (b = 0; b < layer1_size; b++)
	syn1[a * layer1_size + b] = 0;
	}
	if (negative>0) {
	- a = posix_memalign((void *)&syn1neg, 128, (long long)vocab_size layer1_size * sizeof(real));
	+ syn1neg = __mingw_aligned_malloc((long long)vocab_size * layer1_size * sizeof(real), 128);
	if (syn1neg == NULL) {printf("Memory allocation failed\n"); exit(1);}
	for (a = 0; a < vocab_size; a++) for (b = 0; b < layer1_size; b++)
	syn1neg[a * layer1_size + b] = 0;
	@@ -359,11 +366,12 @@
	CreateBinaryTree();
	}

	-void TrainModelThread(void id) {
	+__stdcall unsigned int TrainModelThread(void *arg) {
	+ long id = (long) arg;
	long long a, b, d, cw, word, last_word, sentence_length = 0, sentence_position = 0;
	long long word_count = 0, last_word_count = 0, sen[MAX_SENTENCE_LENGTH + 1];
	long long l1, l2, c, target, label, local_iter = iter;
	- unsigned long long next_random = (long long)id;
	+ unsigned long long next_random = (long long) id;
	real f, g;
	clock_t now;
	real neu1 = (real )calloc(layer1_size, sizeof(real));
	@@ -538,13 +546,14 @@
	fclose(fi);
	free(neu1);
	free(neu1e);
	- pthread_exit(NULL);
	+ _endthreadex(0);
	+ return 0; /* unreachable */
	}

	void TrainModel() {
	long a, b, c, d;
	FILE *fo;
	- pthread_t pt = (pthread_t )malloc(num_threads * sizeof(pthread_t));
	+ HANDLE pt = malloc(num_threads sizeof(HANDLE));
	printf("Starting training using file %s\n", train_file);
	starting_alpha = alpha;
	if (read_vocab_file[0] != 0) ReadVocab(); else LearnVocabFromTrainFile();
	@@ -553,12 +562,14 @@
	InitNet();
	if (negative > 0) InitUnigramTable();
	start = clock();
	- for (a = 0; a < num_threads; a++) pthread_create(&pt[a], NULL, TrainModelThread, (void *)a);
	- for (a = 0; a < num_threads; a++) pthread_join(pt[a], NULL);
	+ for (a = 0; a < num_threads; a++)
	+ pt[a] = (HANDLE) _beginthreadex(NULL, 0, TrainModelThread, (void *)a, 0, NULL);
	+ WaitForMultipleObjects(num_threads, pt, TRUE, INFINITE);
	+ for (a = 0; a < num_threads; a++) CloseHandle(pt[a]);
	fo = fopen(output_file, "wb");
	if (classes == 0) {
	// Save the word vectors
	- fprintf(fo, "%lld %lld\n", vocab_size, layer1_size);
	+ fprintf(fo, PRIdLLD " " PRIdLLD "\n", vocab_size, layer1_size);
	for (a = 0; a < vocab_size; a++) {
	fprintf(fo, "%s ", vocab[a].word);
	if (binary) for (b = 0; b < layer1_size; b++) fwrite(&syn0[a * layer1_size + b], sizeof(real), 1, fo);
	Index: makefile
	===================================================================
	--- makefile (リビジョン 42)
	+++ makefile (作業コピー)
	@@ -1,6 +1,7 @@
	CC = gcc
	#Using -Ofast instead of -O3 might result in faster code, but is supported only by newer GCC versions
	-CFLAGS = -lm -pthread -O3 -march=native -Wall -funroll-loops -Wno-unused-result
	+CFLAGS = -lm -std=c99 -O3 -march=native -Wall -funroll-loops -Wno-unused-result
	+EXT = .exe

	all: word2vec word2phrase distance word-analogy compute-accuracy

	@@ -17,4 +18,4 @@
	chmod +x *.sh

	clean:
	- rm -rf word2vec word2phrase distance word-analogy compute-accuracy
	\ No newline at end of file
	+ rm -rf word2vec$(EXT) word2phrase$(EXT) distance$(EXT) word-analogy$(EXT) compute-accuracy$(EXT)