maidis · February 11, 2019 12:31
diff --git a/kelime-frekansi.cpp b/kelime-frekansi.cpp
 #include <iostream>
 #include <algorithm>
 #include <fstream>
 #include <string>
 #include <unordered_map>
 #include <set>
 #include <functional>

 int main()
 {
    std::unordered_map<std::string, int> frekans;
    std::fstream text("olanlar.txt");

    while(text)
    {
        std::string word;
        text >> word;
        ++frekans[word];
    }

    // https://thispointer.com/how-to-sort-a-map-by-value-in-c/
    typedef std::function<bool(std::pair<std::string, int>, std::pair<std::string, int>)> Comparator;

    Comparator compFunctor =
        [](std::pair<std::string, int> elem1,std::pair<std::string, int> elem2)
    {
        return elem1.second > elem2.second;
    };


    std::multiset<std::pair<std::string, int>, Comparator> setOfWords(
        frekans.begin(), frekans.end(), compFunctor);

    for (std::pair<std::string, int> element : setOfWords)
        std::cout << element.first << " :: " << element.second << std::endl;

 }
diff --git a/nutuk-frekans.txt b/nutuk-frekans.txt
 bey :: 2359
 paşa :: 2278
 millet :: 1751
 milliye :: 1157
 hükümet :: 1090
 efendi :: 1043
 istanbul :: 918
 meclis :: 914
 kendi :: 831
 karşı :: 701
 suret :: 677
 vaziyet :: 647
 devlet :: 632
 ordu :: 629
 telgraf :: 624
 ben :: 619
 sivas :: 618
 bütün :: 618
 ali :: 614
 hareket :: 605
 tarih :: 601
 memleket :: 566
 kumandan :: 563
 kuvvet :: 560
 kemal :: 558
 arz :: 552
 fırka :: 539
 kabul :: 539
 büyük :: 530
 kongre :: 524
 kumanda :: 520
 ankara :: 513
 hazret :: 507
 cemiyet :: 506
 zaman :: 493
 mustafa :: 493
 cevap :: 492
 kolordu :: 415
 şifre :: 413
 vatan :: 406
 vekil :: 390
 hak :: 390
 heyet :: 389
 rauf :: 386
 idare :: 380
 umumi :: 380
 nazar :: 379
 evvel :: 372
 vesika :: 365
 husus :: 364
 mesele :: 363
 cephe :: 359
 taraf :: 359
 çalış :: 359
 maksat :: 357
 hukuk :: 355
 ingiliz :: 353
 reis :: 348
 rica :: 346
 vali :: 346
 vazife :: 341
 lüzum :: 340
 erzurum :: 335
 düşman :: 334
 ara :: 334
 nazır :: 331
 namına :: 326
 yalnız :: 326
 teşkil :: 317
 gönder :: 316
 işgal :: 312
 baş :: 310
 emir :: 309
 milli :: 309
 anadol :: 307
 kabine :: 306
 aynı :: 306
 nokta :: 305
 teklif :: 305
 teşkilat :: 304
 mütalaa :: 289
 intihap :: 284
 mühim :: 280
 mebus :: 280
 icap :: 274
 temsili :: 271
 riyaset :: 271
 söz :: 269
 zevat :: 263
 esas :: 260
 netice :: 260
 fikir :: 255
 dikkat :: 251
 türk :: 246
 ismet :: 241
 talep :: 240
 takip :: 239
 karar :: 238
 devam :: 237
 yeni :: 235
 umumiye :: 233
 cemal :: 229
 vilayet :: 229
 tayin :: 228
 ferit :: 223
 doğru :: 223
 murahhas :: 216
 hilafet :: 212
 cumhuriyet :: 211
 taarruz :: 211
 itilaf :: 208
 tatbik :: 207
 dahiliye :: 204
 temas :: 202
 havali :: 202
 arzu :: 202
 istiklal :: 202
 ilan :: 201
 telakki :: 200
 sebep :: 199
 tamamen :: 199
 efendim :: 199
 türki :: 198
 trakya :: 197
 arkadaş :: 195
 itimat :: 195
diff --git a/StemmingAndLemmatization.java b/StemmingAndLemmatization.java
 package zemberek.examples.morphology;

 import zemberek.core.logging.Log;
 import zemberek.morphology.TurkishMorphology;
 import zemberek.morphology.analysis.SingleAnalysis;
 import zemberek.morphology.analysis.WordAnalysis;

 import java.util.Scanner;
 import java.io.File;
 import java.io.BufferedWriter;
 import java.io.FileWriter;
 import java.io.FileNotFoundException;


 public class StemmingAndLemmatization {

    public static void main(String[] args) {
        TurkishMorphology morphology = TurkishMorphology.createWithDefaults();

        String word = "muvaffakiyetsizleştirici";

        try {
            BufferedWriter writer = new BufferedWriter(new FileWriter("/home/maidis/İndirilenler/zemberek-nlp-master/examples/src/main/java/zemberek/examples/morphology/olmayanlar.txt"));
            BufferedWriter writer2 = new BufferedWriter(new FileWriter("/home/maidis/İndirilenler/zemberek-nlp-master/examples/src/main/java/zemberek/examples/morphology/olanlar.txt"));


            Scanner sc2 = null;
            try {
                sc2 = new Scanner(new File("/home/maidis/İndirilenler/zemberek-nlp-master/examples/src/main/java/zemberek/examples/morphology/text.txt"));
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }
            while (sc2.hasNextLine()) {
                Scanner s2 = new Scanner(sc2.nextLine());
                while (s2.hasNext()) {
                    String s = s2.next();
                    s = s.replaceAll("\\s*\\p{Punct}+\\s*$", "");
                    //System.out.println(s);
                    if (s.length()>2) {
                        try {
                            WordAnalysis results = morphology.analyze(s);
                            SingleAnalysis result = results.getAnalysisResults().get(0);
                            Log.info(result.getLemmas().get(0));
                            writer2.write(result.getLemmas().get(0).toString() + '\n');
                        } catch (IndexOutOfBoundsException e) {
                            System.out.println(s);
                            writer.write(s + '\n');
                        }
                    }
                }
            }
            writer.close();
            writer2.close();
        } catch(java.io.IOException ioe) {
            //you write here code if an ioexcepion happens. You can leave it empty if you want
        }
    }

 }
	#include <iostream>
	#include <algorithm>
	#include <fstream>
	#include <string>
	#include <unordered_map>
	#include <set>
	#include <functional>

	int main()
	{
	std::unordered_map<std::string, int> frekans;
	std::fstream text("olanlar.txt");

	while(text)
	{
	std::string word;
	text >> word;
	++frekans[word];
	}

	// https://thispointer.com/how-to-sort-a-map-by-value-in-c/
	typedef std::function<bool(std::pair<std::string, int>, std::pair<std::string, int>)> Comparator;

	Comparator compFunctor =
	[](std::pair<std::string, int> elem1,std::pair<std::string, int> elem2)
	{
	return elem1.second > elem2.second;
	};


	std::multiset<std::pair<std::string, int>, Comparator> setOfWords(
	frekans.begin(), frekans.end(), compFunctor);

	for (std::pair<std::string, int> element : setOfWords)
	std::cout << element.first << " :: " << element.second << std::endl;

	}
	bey :: 2359
	paşa :: 2278
	millet :: 1751
	milliye :: 1157
	hükümet :: 1090
	efendi :: 1043
	istanbul :: 918
	meclis :: 914
	kendi :: 831
	karşı :: 701
	suret :: 677
	vaziyet :: 647
	devlet :: 632
	ordu :: 629
	telgraf :: 624
	ben :: 619
	sivas :: 618
	bütün :: 618
	ali :: 614
	hareket :: 605
	tarih :: 601
	memleket :: 566
	kumandan :: 563
	kuvvet :: 560
	kemal :: 558
	arz :: 552
	fırka :: 539
	kabul :: 539
	büyük :: 530
	kongre :: 524
	kumanda :: 520
	ankara :: 513
	hazret :: 507
	cemiyet :: 506
	zaman :: 493
	mustafa :: 493
	cevap :: 492
	kolordu :: 415
	şifre :: 413
	vatan :: 406
	vekil :: 390
	hak :: 390
	heyet :: 389
	rauf :: 386
	idare :: 380
	umumi :: 380
	nazar :: 379
	evvel :: 372
	vesika :: 365
	husus :: 364
	mesele :: 363
	cephe :: 359
	taraf :: 359
	çalış :: 359
	maksat :: 357
	hukuk :: 355
	ingiliz :: 353
	reis :: 348
	rica :: 346
	vali :: 346
	vazife :: 341
	lüzum :: 340
	erzurum :: 335
	düşman :: 334
	ara :: 334
	nazır :: 331
	namına :: 326
	yalnız :: 326
	teşkil :: 317
	gönder :: 316
	işgal :: 312
	baş :: 310
	emir :: 309
	milli :: 309
	anadol :: 307
	kabine :: 306
	aynı :: 306
	nokta :: 305
	teklif :: 305
	teşkilat :: 304
	mütalaa :: 289
	intihap :: 284
	mühim :: 280
	mebus :: 280
	icap :: 274
	temsili :: 271
	riyaset :: 271
	söz :: 269
	zevat :: 263
	esas :: 260
	netice :: 260
	fikir :: 255
	dikkat :: 251
	türk :: 246
	ismet :: 241
	talep :: 240
	takip :: 239
	karar :: 238
	devam :: 237
	yeni :: 235
	umumiye :: 233
	cemal :: 229
	vilayet :: 229
	tayin :: 228
	ferit :: 223
	doğru :: 223
	murahhas :: 216
	hilafet :: 212
	cumhuriyet :: 211
	taarruz :: 211
	itilaf :: 208
	tatbik :: 207
	dahiliye :: 204
	temas :: 202
	havali :: 202
	arzu :: 202
	istiklal :: 202
	ilan :: 201
	telakki :: 200
	sebep :: 199
	tamamen :: 199
	efendim :: 199
	türki :: 198
	trakya :: 197
	arkadaş :: 195
	itimat :: 195
	package zemberek.examples.morphology;

	import zemberek.core.logging.Log;
	import zemberek.morphology.TurkishMorphology;
	import zemberek.morphology.analysis.SingleAnalysis;
	import zemberek.morphology.analysis.WordAnalysis;

	import java.util.Scanner;
	import java.io.File;
	import java.io.BufferedWriter;
	import java.io.FileWriter;
	import java.io.FileNotFoundException;


	public class StemmingAndLemmatization {

	public static void main(String[] args) {
	TurkishMorphology morphology = TurkishMorphology.createWithDefaults();

	String word = "muvaffakiyetsizleştirici";

	try {
	BufferedWriter writer = new BufferedWriter(new FileWriter("/home/maidis/İndirilenler/zemberek-nlp-master/examples/src/main/java/zemberek/examples/morphology/olmayanlar.txt"));
	BufferedWriter writer2 = new BufferedWriter(new FileWriter("/home/maidis/İndirilenler/zemberek-nlp-master/examples/src/main/java/zemberek/examples/morphology/olanlar.txt"));


	Scanner sc2 = null;
	try {
	sc2 = new Scanner(new File("/home/maidis/İndirilenler/zemberek-nlp-master/examples/src/main/java/zemberek/examples/morphology/text.txt"));
	} catch (FileNotFoundException e) {
	e.printStackTrace();
	}
	while (sc2.hasNextLine()) {
	Scanner s2 = new Scanner(sc2.nextLine());
	while (s2.hasNext()) {
	String s = s2.next();
	s = s.replaceAll("\\s\\p{Punct}+\\s$", "");
	//System.out.println(s);
	if (s.length()>2) {
	try {
	WordAnalysis results = morphology.analyze(s);
	SingleAnalysis result = results.getAnalysisResults().get(0);
	Log.info(result.getLemmas().get(0));
	writer2.write(result.getLemmas().get(0).toString() + '\n');
	} catch (IndexOutOfBoundsException e) {
	System.out.println(s);
	writer.write(s + '\n');
	}
	}
	}
	}
	writer.close();
	writer2.close();
	} catch(java.io.IOException ioe) {
	//you write here code if an ioexcepion happens. You can leave it empty if you want
	}
	}

	}