Created
December 2, 2018 15:29
-
-
Save bowbowbow/4b78edb6b2721ce952b8742f9f5d4de6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json, re, datetime | |
| import pandas as pd | |
| import numpy as np | |
| import nltk | |
| from nltk.stem.snowball import SnowballStemmer | |
| stopwords = nltk.corpus.stopwords.words('english') | |
| stemmer = SnowballStemmer("english") | |
| from sklearn.base import BaseEstimator, TransformerMixin | |
| from sklearn.pipeline import FeatureUnion, Pipeline | |
| from sklearn.preprocessing import OneHotEncoder, LabelEncoder | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics import pairwise_distances_argmin_min, pairwise | |
| def data_load(): | |
| frames = [] | |
| for i in range(0, 8): | |
| with open('./data/koreaherald_1517_{}.json'.format(i), 'r') as f: | |
| data = json.load(f) | |
| data['year'] = dict() | |
| data['timestamp'] = dict() | |
| for doc_id in data[' body']: | |
| time = datetime.datetime.strptime(data[' time'][doc_id], '%Y-%m-%d %H:%M:%S') | |
| data['year'][doc_id] = time.year | |
| data['timestamp'][doc_id] = (time.month * 30 + time.day) / 100.0 | |
| data['title'][doc_id] = data['title'][doc_id].replace(r"\[.*\]","") | |
| df = pd.DataFrame.from_dict(data) | |
| # header: [' author' ' body' ' description' ' section' ' time' 'title'] | |
| # print(df.columns.values) | |
| frames.append(df) | |
| return pd.concat(frames) | |
| def chunking(text): | |
| chunks = [] | |
| parser = nltk.RegexpParser("NP: {<DT>?<JJ>?<NN.*>*}") | |
| for sent in nltk.sent_tokenize(text): | |
| words = [word for word in nltk.word_tokenize(sent) if word.isalpha()] | |
| # words = [word for word in words if word not in stopwords] | |
| tags = nltk.pos_tag(words) | |
| tree = parser.parse(tags) | |
| leaves = [s.leaves() for s in tree.subtrees() if s.label() == "NP"] | |
| for leave in leaves: | |
| chunk = [word[0] for word in leave] | |
| chunks.append('_'.join(chunk)) | |
| return chunks | |
| def get_proppers(text): | |
| propernouns = [] | |
| for sent in nltk.sent_tokenize(text): | |
| words = [word for word in nltk.word_tokenize(sent) if word.isalpha()] | |
| tags = nltk.pos_tag(words) | |
| propernouns += [word for word, pos in tags if pos == 'NNP' or pos == 'NNPS'] | |
| return propernouns | |
| def tokenize_and_stem(text): | |
| stems = [] | |
| for sent in nltk.sent_tokenize(text): | |
| words = [word for word in nltk.word_tokenize(sent) if word.isalpha()] | |
| words = [word for word in words if word not in stopwords] | |
| for word in words: stems.append(stemmer.stem(word)) | |
| return stems | |
| def clustering(year): | |
| print('year :', year) | |
| df = data_load() | |
| df = df[df.year == year] | |
| # print(df[:5].to_string()) | |
| encoder = LabelEncoder() | |
| encoder.fit(df[' author']) | |
| df[' author'] = encoder.transform(df[' author']) | |
| encoder.fit(df[' section']) | |
| df[' section'] = encoder.transform(df[' section']) | |
| class TextSelector(BaseEstimator, TransformerMixin): | |
| def __init__(self, key): | |
| self.key = key | |
| def fit(self, x, y=None): | |
| return self | |
| def transform(self, data_dict): | |
| return data_dict[self.key] | |
| class NumberSelector(BaseEstimator, TransformerMixin): | |
| def __init__(self, key): | |
| self.key = key | |
| def fit(self, x, y=None): | |
| return self | |
| def transform(self, data_dict): | |
| return data_dict[[self.key]] | |
| vectorizer = FeatureUnion( | |
| transformer_list=[ | |
| ('propernouns', Pipeline([ | |
| ('selector', TextSelector(key=' body')), | |
| ('tfidf', TfidfVectorizer(tokenizer=get_proppers, ngram_range=(1, 2))) | |
| ])), | |
| ('title', Pipeline([ | |
| ('selector', TextSelector(key='title')), | |
| ('tfidf', TfidfVectorizer(max_df=300, tokenizer=tokenize_and_stem, ngram_range=(1, 2))) | |
| ])), | |
| ('body', Pipeline([ | |
| ('selector', TextSelector(key=' body')), | |
| ('tfidf', TfidfVectorizer(max_df=350, tokenizer=tokenize_and_stem, ngram_range=(1, 2))) | |
| ])), | |
| ('author', Pipeline([ | |
| ('selector', NumberSelector(key=' author')), | |
| ('onehot', OneHotEncoder(categories='auto')) | |
| ])), | |
| ('section', Pipeline([ | |
| ('selector', NumberSelector(key=' section')), | |
| ('onehot', OneHotEncoder(categories='auto')) | |
| ])), | |
| ('timestamp', Pipeline([ | |
| ('selector', NumberSelector(key='timestamp')), | |
| ])), | |
| ], | |
| # weight components in FeatureUnion | |
| transformer_weights={ | |
| 'section': 1.2, | |
| 'propernouns': 2.0, | |
| 'title': 1.0, | |
| 'body': 1.0, | |
| 'timestamp': 0.5, | |
| 'author': 0.3, | |
| }, | |
| ) | |
| X = vectorizer.fit_transform(df) | |
| true_k = 20 | |
| from sklearn.cluster import KMeans | |
| model = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1) | |
| model.fit(X) | |
| df['cluster'] = model.labels_ | |
| import collections | |
| counts = collections.Counter(model.labels_) | |
| counts = [(c, counts[c]) for c in counts] | |
| counts = sorted(counts, key=lambda x: x[1], reverse=True) | |
| closest, _ = pairwise_distances_argmin_min(model.cluster_centers_, X) | |
| print('closest :', closest) | |
| for count in counts[:10]: | |
| c = count[0] | |
| print('cluster {}'.format(c)) | |
| dis = model.transform(X)[:, c] | |
| dis = [(i, dis[i]) for i in range(len(dis))] | |
| dis = sorted(dis, key=lambda x: x[1]) | |
| for item in dis[:5]: | |
| doc_id = item[0] | |
| print(doc_id, ', title :', df.iloc[doc_id]['title']) | |
| # print(chunking(doc[' body'])) | |
| if __name__ == "__main__": | |
| clustering(year=2017) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
2017
counts : [(12, 924), (7, 830), (6, 773), (0, 744), (8, 647), (10, 633), (1, 607), (2, 579), (11, 569), (13, 433), (18, 427), (4, 398), (15, 265), (19, 227), (14, 219), (5, 216), (3, 213), (17, 208), (9, 183), (16, 31)]
cluster 12
2456 , title : [Graphic News] A day in Seoul in numbers
555 , title : Top 10 national news stories
459 , title : Prosecutors seek arrest warrants for two Park aides over bribery
528 , title : Prosecutors detain two Park aides over bribery allegations
314 , title : Prosecutors seek arrest warrant for ex-MBC chief for aiding in state's media control
cluster 7
5145 , title : [Graphic News] Election in numbers
6862 , title : Acting president, provincial gov. virtually tied in presidential poll
7200 , title : Support grows for opposition presidential hopefuls, poll shows
6171 , title : Democratic Party begins polling to elect presidential candidate
7039 , title : Main opposition warns of impeaching acting president if no probe extension
cluster 6
6108 , title : [Graphic News] Salvage operation procedure
6219 , title : [Graphic News] Former President Park Geun-hye‘s grilling by the prosecution
6603 , title : [Graphic News] Four possible scenarios
2456 , title : [Graphic News] A day in Seoul in numbers
555 , title : Top 10 national news stories
cluster 0
6219 , title : [Graphic News] Former President Park Geun-hye‘s grilling by the prosecution
6603 , title : [Graphic News] Four possible scenarios
6108 , title : [Graphic News] Salvage operation procedure
9109 , title : 70% of Korea’s fine dust particles come from China: study
7057 , title : Jailed Samsung heir questioned again in corruption probe
cluster 8
2162 , title : Chronology of North Korea's missile, rocket launches
1985 , title : S. Korea, US agree to most powerful sanctions over N. Korea nuke test
2113 , title : NK confirms intermediate-range missile launch, hints at more tests
5224 , title : S. Korea, US military chiefs to meet amid NK missile threats
5248 , title : EU to hold ‘special’ session on North Korea next week
cluster 10
7812 , title : [Graphic News] US forces in South Korea
8084 , title : [Graphic News] Military strength of the two Koreas
5926 , title : US naval commander to get Korean name
3604 , title : THAAD deployment was ‘transparent’: US military
3977 , title : S. Korea, US defense ministers agree to closely coordinate against NK threats
cluster 1
5145 , title : [Graphic News] Election in numbers
1690 , title : Parliament calls SMEs minister nominee 'unfit' in hearing report
2027 , title : Cheong Wa Dae proposes forming consultation body with ruling, opposition parties
2193 , title : Cheong Wa Dae finds documents from former govt. related to cultural blacklist
4044 , title : Commission to conduct first opinion survey to determine fate of nuclear reactors
cluster 2
562 , title : Top nuke envoys of S. Korea, China to meet this week to discuss NK issue
646 , title : N. Korea says nuclear test threat should be taken 'literally'
173 , title : US envoy on N. Korean nukes to visit S. Korea this week
338 , title : S. Korean foreign minister calls for peaceful, diplomatic resolution to NK issue
512 , title : S. Korean envoy heads to China for talks on NK nukes
cluster 11
5437 , title : Moon's approval rating slips to 72.5%
3043 , title : Moon's approval rating slightly slips but still at record high level
3230 , title : Moon's rating further gains amid growing N. Korea tension
5225 , title : Moon's approval rating declines amid N. Korean risks
5531 , title : Moon's approval rating rises to 74.4%
cluster 13
4802 , title : UN chief condemns N. Korea's missile test
4817 , title : Experts say NK missile test to reduce Moon's options in pursuit of dialogue
4535 , title : S. Korea reaffirms firm response to NK nukes, but openness to civilian exchanges
4542 , title : China calls for new talks with North Korea on nukes
4161 , title : NK rejects S. Korean aid provider's inter-Korean exchanges, citing sanctions
cluster 18
7269 , title : Spy agency confirms murder of NK leader's half brother
7063 , title : NK diplomat, airline staff named as suspects in Kim's murder: Malaysia
6894 , title : Ex-NK envoy to UN visits Malaysia over Kim's death
7004 , title : VX nerve agent found on dead half brother of NK leader
7107 , title : NK leader's paranoia may have led to killing of half brother: defector
cluster 4
5145 , title : [Graphic News] Election in numbers
6108 , title : [Graphic News] Salvage operation procedure
6219 , title : [Graphic News] Former President Park Geun-hye‘s grilling by the prosecution
3642 , title : Foreign teachers left stranded by crackdown on visa violations
3530 , title : Five elite schools may face loss of special status
cluster 15
8748 , title : Abe instructs envoy to request S. Korea implement deal on comfort women: report
8339 , title : China's foreign ministry warns against worsening tension after N. Korea's failed missile launch
8700 , title : Trump says all options on table in dealing with N. Korea
8469 , title : Pence to send clear message over N. Korea, THAAD during Seoul visit: official
8428 , title : Korea to attend IHO meeting in Monaco to promote use of East Sea
cluster 19
5481 , title : Korea, China to hold talks on EEZ next week
5473 , title : UN Diplomat: $1 billion in N. Korea exports would be banned
5474 , title : UN set to vote on new N. Korea sanctions
4256 , title : S. Korea, China hold annual public diplomacy forum amid THAAD row
5929 , title : Senior diplomats of S. Korea, US, Japan to meet over policy, N. Korea
cluster 14
3360 , title : [Herald Interview] ‘Canada wholehearted on global leadership amid 150th anniversary’
3753 , title : [Herald Interview] Ethnic Koreans sow seeds of success in Kazakhstan
5982 , title : [Herald Interview] ‘Indonesia hub-and-spoke anchor of ASEAN economy’
3752 , title : Astana Expo harbinger of future energy politics
3958 , title : [Herald Interview] ‘Swiss direct democracy enfranchises citizens’
cluster 5
3233 , title : US rejects 'freeze-for-freeze' proposal from China, Russia over NK
3235 , title : Victor Cha: China should be made to pay NK for constraints on nuclear program
3579 , title : Human rights group urges China not to repatriate 5 NK defectors
5641 , title : Trump very ‘disappointed in China’ for doing nothing to stop N. Korea‘s provocations
3298 , title : Missile test deals blow to Moon’s NK initiative
cluster 3
4004 , title : Conservatives gear up to elect new leaders
3970 , title : [News Focus] Moon's extra budget speech fails to win over opposition
3088 , title : [News Focus] Bareun Party charts new conservative course
5933 , title : Assembly passes pending bills, fails to handle budget
4265 , title : Main opposition party refuses to cooperate with government
cluster 17
5217 , title : Record number of overseas Koreans cast ballots in presidential election
4218 , title : 4 in 10 older workers want post-retirement jobs: poll
4117 , title : Korea to take more responsibility for dementia patients
4347 , title : Number of China-bound travelers drops 42% on THAAD spat: data
4428 , title : Suspected vessel hijacking false alarm, crew confirmed safe
cluster 9
6527 , title : Tillerson to visit Korea this week
6232 , title : Top diplomats of S. Korea, Vietnam discuss cooperation on NK threats
6961 , title : S. Korea, US, Japan to discuss N. Korea's assassination during trilateral talks: official
6608 , title : S. Korea, US to discuss N. Korean nuclear threat: spokesman
7202 , title : Top diplomats from Korea, Japan set for talks amid heightened tensions
cluster 16
6618 , title : CCTV installed to protect sex slave statue
4534 , title : Korea's childbirths on steady decline in March
6434 , title : Participants in state nonsmoking program increase in 2016
6067 , title : How the ferry Sewol sank and what it means
6258 , title : [From the Scene] Final preparations underway to lift sunken Sewol ferry
2016
counts : [(3, 954), (8, 853), (1, 802), (0, 754), (10, 627), (4, 579), (7, 469), (6, 340), (2, 334), (16, 276), (12, 261), (14, 247), (9, 224), (18, 193), (5, 167), (13, 151), (11, 130), (19, 47), (17, 44), (15, 33)]
cluster 3
5621 , title : Bulgaria reaffirms support for denuclearization, vows to fully implement N. Korea sanctions
1840 , title : Hungry N. Korean soldiers committing various crimes: report
6930 , title : South Korea, China agree to fully implement new U.N. sanctions on North Korea
5033 , title : South Korea welcomes new U.S. sanctions on North Korea
6582 , title : South Korea to unveil its own sanctions on North Korea this week
cluster 8
5171 , title : Labor union leader gets five-year jail term for violent rallies
5996 , title : Nearly 1,400 pregnant women, families cancel trips on Zika virus fears
4927 , title : Korea reports first Zika virus-infected patient
4901 , title : Korea to toughen monitoring on Zika virus
5035 , title : Court upholds suspended jail term for man who threatened to blow up plane
cluster 1
5171 , title : Labor union leader gets five-year jail term for violent rallies
5545 , title : Former senior prosecutor indicted over lobbying scandal
5327 , title : 22 men arrested five years after gang rape
5483 , title : Senior prosecutor arrested over bribe-taking
2962 , title : Google Korea CEO to stand trial over toxic sterilizers
cluster 0
1840 , title : Hungry N. Korean soldiers committing various crimes: report
5621 , title : Bulgaria reaffirms support for denuclearization, vows to fully implement N. Korea sanctions
1096 , title : NK leader observes missile launches, calls for stronger nuke force
1355 , title : NK leader calls SLBM launch success, boasts of nuke
1073 , title : Number of NK defectors up 15% on-year in first 8 months of this year
cluster 10
5171 , title : Labor union leader gets five-year jail term for violent rallies
111 , title : Prosecutors broadening probe into Choi Soon-sil scandal
2784 , title : 2 ex-presidential aides to be questioned over Choi Soon-sil scandal
115 , title : Prosecutors raid banks related to Choi Soon-sil
119 , title : Prosecutors place Choi Soon-sil under emergency detention after questioning
cluster 4
3173 , title : Defense ministry counters health, diplomatic concerns over THAAD deployment
5842 , title : THAAD to 'dramatically' enhance capacity to counter NK threat: defense chief
5626 , title : Senate defense budget bill fails to include amendment calling for THAAD deployment in S. Korea
3939 , title : Park strongly defends THAAD deployment decision
4473 , title : Korea, U.S. reach decision to deploy THAAD defense system in Korea
cluster 7
2368 , title : [Graphic News] What Park should do?
101 , title : ‘Park should own up’
5693 , title : U.N. chief remains ahead of other presidential hopefuls: survey
2332 , title : Park Geun-hye impeached
2588 , title : Parliament to vote on Park's impeachment early in Dec.: Democratic Party
cluster 6
5693 , title : U.N. chief remains ahead of other presidential hopefuls: survey
101 , title : ‘Park should own up’
4103 , title : Feud-ridden Saenuri to elect floor leader in May
3605 , title : Park to meet with new floor leaders of 3 political parties
4111 , title : Justice minister vows to crack down on election law violations
cluster 2
4234 , title : Sri Lanka, paradise for the restless
4232 , title : Sri Lanka, the land of Ceylon tea
4256 , title : [HERALD INTERVIEW] ‘Political solution key to global humanitarian crises’
4414 , title : India partners Korea at maiden maritime summit
4411 , title : Norwegian PM to visit Korea
cluster 16
1069 , title : Park, Abe hold summit over NK provocations, bilateral issues
1095 , title : Park, Obama to hold summit amid NK provocations
909 , title : Forum ponders constructive path for Korea, Japan
1084 , title : Park arrives in Laos to attend ASEAN, EAS summits
1121 , title : Turkey honors patriotism on 94th Victory Day
cluster 12
4928 , title : No. 2 diplomats from U.S., S. Korea, Japan to hold talks next week
5158 , title : Swiss president to visit S. Korea next week
5465 , title : S. Korea set to hold top commanders' meeting over N.K. missile
5326 , title : Korea, U.S. to discuss cooperation at cybersecurity meeting
5373 , title : S. Korea's vice defense minister visits Cambodia, Laos this week
cluster 14
3346 , title : Saenuri bigwigs to meet on feud
3149 , title : Constitutional Court rejects Saenuri challenge to Assembly Quorom
5723 , title : [Newsmaker] Ahn Cheol-soo hit by political double whammy
3103 , title : Park vetoes hearing bill
5610 , title : Constitutional revision talk resurfaces
cluster 9
3058 , title : Trump raps Obama for visiting Hiroshima visit without noting Japan's aggression
3059 , title : Trump renews calls for allies to pay up for U.S. protection
2925 , title : Clinton underscores 'power of allies,' warns of dangers of Trump
3168 , title : Obama says U.S., Japan agree on strengthening defense
3332 , title : Clinton says Trump's statement on N.K. shows he's not qualified to be president
cluster 18
4251 , title : Only permanent residents to be able to apply for naturalization in S. Korea
2093 , title : Court again rejects Korean A-bomb victims' suit against gov't
3602 , title : Korea to control e-cigarettes in antismoking policy
3632 , title : No. of visitors on Jeju tops 5 mln
2118 , title : Survey shows signs of improving S. Korea-Japan relations
cluster 5
5773 , title : Former opposition party leader elected as new speaker
4002 , title : Minjoo remains ahead of Saenuri in polls
4602 , title : [ELECTION 2016] Time runs out for opposition unity
3291 , title : Debate grows over political realignment
4238 , title : Opposition elects troubled by election lawsuits
cluster 13
3876 , title : N.K.'s missile tests aimed at highlighting accomplishments before party congress: Seoul
5418 , title : Latest test reveals N. Korean missile capable of flying 3,500 km: military
3957 , title : China unlikely to send delegation to N. Korea‘s rare party congress
5234 , title : South Korea's U.N. ambassador calls for stringent implementation of sanctions on North Korea
5337 , title : China submits implementation plan on sanctions against N. Korea
cluster 11
5492 , title : Opposition lawmakers propose bill banning texting after work hours
3784 , title : Minister apologizes for official's 'dogs and pigs' remarks
3684 , title : [Newsmaker] Official apologizes for ‘dogs and pigs’ remarks
2009 , title : 3 ministries, Oxy to face parliamentary on-site probe
3417 , title : Opposition parties, civic groups allign to reform chaebol
cluster 19
5504 , title : N. Korea fires off 2 Musudan IRBM missiles
5506 , title : N. Korea botches fifth Musudan missile test-launch
3610 , title : N.K. adopts decision to elect its leader as ruling party's chairman
4939 , title : U.S. imposes sanctions on N. Korean leader over human rights abuses
3410 , title : North Korea built up personality cult of Kim Jong-un ahead of key congress
cluster 17
5259 , title : Korean Navy's first-generation submarines decommissioned
2100 , title : Park visits major hub for ICT startups
2008 , title : 10 mln customer data leaked from online shopping site
5434 , title : S. Korea approves building of 2 more nuclear reactors
5563 , title : Two Saenuri leaders take fence-mending step
cluster 15
1859 , title : Canada warns 'extreme risks' for travelers wanting to visit N. Korea
1217 , title : Asiatic black bear from N. Korea on 1st animal exchange program dies
1193 , title : NK executed vice premier in July
4814 , title : Detained U.S. citizen admits his crime of espionage: N. Korea
5890 , title : Speculation grows over North Korea's missile test
2015
counts : [(19, 733), (3, 645), (7, 628), (11, 611), (1, 605), (6, 570), (4, 552), (0, 460), (13, 423), (2, 291), (12, 289), (9, 223), (15, 215), (8, 202), (17, 167), (5, 157), (16, 151), (14, 142), (10, 66), (18, 26)]
cluster 19
5004 , title : Families of Sewol victims call for swift salvage of sunken ferry
5093 , title : Prosecution appeals guilty verdict of Korean Air heiress
5187 , title : Ex-parliament chief gets suspended jail term for harassment
4667 , title : Prosecutors raid POSCO E&C over alleged slush funds
5170 , title : Ex-Navy chief indicted for alleged graft
cluster 3
7136 , title : Opposition party unveils own proposal for pension reform
6670 , title : Ruling party chief willing to cooperate with corruption scandal probe
4397 , title : Park calls for compromise on labor, pension reforms
6692 , title : Park orders thorough probe into bribery scandal
6614 , title : Opposition party to consider impeaching embattled PM
cluster 7
3919 , title : No long-range rocket seen at North Korea's launch site yet: diplomatic source
3913 , title : N. Korea vows to launch rocket
4266 , title : S. Korea's Red Cross proposes talks with North on Sept. 7 for family reunions
3783 , title : N. Korea shows no signs of nuclear test preparations: 38 North
3903 , title : Koreas exchange rough lists of candidates for family reunions
cluster 11
2168 , title : S. Korea reports no new MERS cases for 10th day
2148 , title : S. Korea reports no new MERS cases for 11th day
2205 , title : S. Korea reports no new MERS cases for 8th day
2190 , title : S. Korea reports no new MERS cases for 9th day
2108 , title : S. Korea reports no new MERS cases for 14th day
cluster 1
815 , title : Police arrest 51 at massive protest rally
1121 , title : Korean indicted for flag burning during protest rally
1154 , title : Prosecutors to indict former president's brother
1122 , title : Civic group member arrested over alleged security law breach
882 , title : Former POSCO chief indicted over alleged corruption
cluster 6
2948 , title : S. Korea, Japan to meet over wartime sex slaves
2950 , title : FM Yun considers visiting Japan
3004 , title : Japanese scholars urge Abe to offer apology for history
2753 , title : FM Yun set for fence-mending trip to Japan
2619 , title : Any solution to sex slave issue should satisfy victims: FM Yun
cluster 4
4735 , title : N. Korea unlikely to conduct nuclear or long-range missile test: 38 North
4478 , title : N. Korean leader guides air force drill: KCNA
4934 , title : China urges restraint as N. Korea fires short-range missiles
4939 , title : N. Korea threatens missile attack on anti-Pyongyang leaflets
4673 , title : N. Korea spurns S. Korea's call for talks on Gaesong wages
cluster 0
6224 , title : [Graphic News] Comparison of two Koreas’ military strength
2442 , title : U.S. Army holds public hearings on proposal to permanently deploy THAAD to Guam
2311 , title : Price, favorable terms to be basics of S. Korea's defense
2315 , title : Military officer accused of leaking military info to China
2612 , title : U.S. strategic commander vows strong deterrence against N. Korea
cluster 13
1222 , title : Park's approval rating edges down amid history textbook row
3802 , title : Ex-head of opposition party proposes ways to reform party
1186 , title : Park's approval rating edges down amid history textbook row
3770 , title : Park's approval rating edges down amid factional feud
2777 , title : Park's approval rating edges up as factional feud subsides
cluster 2
4295 , title : S. Korean students urge Abe to apologize for sexual slavery
4243 , title : S. Korea urges Japan to take action over sex slave issue
4338 , title : China says foreign leaders will attend Sept. 3 military parade
4108 , title : Deputy FM to visit Iran for policy talks
4296 , title : U.S. says respects S. Korea's decision to attend Chinese military parade
cluster 12
4528 , title : S. Korea, China, Japan may discuss AIIB in Seoul: Beijing
4538 , title : S. Korea 'positively' mulling joining China-led bank: source
4621 , title : Seoul, Tokyo to hold sex slavery talks
4531 , title : Seoul says no decision yet on joining China-led bank
4546 , title : U.S. says it's up to S. Korea to decide on AIIB
cluster 9
3111 , title : Experts say S. Korea can contain MERS with quarantine
2952 , title : MERS feared to dent consumption, growth: foreign investors
2971 , title : Supporters of sexual minorities hold event in Seoul
3096 , title : S. Korea forms MERS task force on national image
3472 , title : Unemployment among youth with no job experience hits 12-year high
cluster 15
3875 , title : Parliamentary committee passes lawmaker's expulsion motion over alleged sexual assault
4260 , title : Sex trade businesses caught by police nearly triple in 2 years
4258 , title : NPAD may oust ex-vice spokesman for praising N.K. leader
4257 , title : President Park's approval rating reaches near 50%
4256 , title : Seoul proposes Sept. 7 talks for family reunions
cluster 8
3213 , title : Georgia celebrates national independence
2506 , title : Danish ambassador discusses N.K. visit
2663 , title : Turkey promotes unexplored treasures
3033 , title : Investing in Philippines opens door to ASEAN
3034 , title : Jordan celebrates 69th anniversary of independence
cluster 17
2627 , title : Four elite schools fail assessments
2474 , title : Education Ministry snubs call to allow tuition fee hikes
3186 , title : [Herald Interview] ‘High payoffs both driving factor, risk in education’
2084 , title : Seoul private school stripped of elite status
3185 , title : [Herald Interview] Norway stresses state-funded education
cluster 5
277 , title : Korea, Japan hold talks on 'comfort women' issue
330 , title : Seoul, Tokyo to hold talks on 'comfort women' issue
75 , title : Korea, Japan strike deal on 'comfort women'
79 , title : Korea, Japan FMs to meet over sex slave issue
450 , title : Park, leaders of Visegrad Group urge N. Korea to halt nuclear program
cluster 16
2361 , title : No. of foreign residents more than triples over 10 years
2825 , title : S. Korea reports two more MERS deaths, 5 new cases
2053 , title : [Herald Interview] Only the beginning for Korea’s migrant workers’ labor movement
2449 , title : Korea eyes 37% emissions cut
1925 , title : Crimes by seniors on the rise
cluster 14
1743 , title : China 'cynical' about N. Korea's behavior: source
4191 , title : N. Korea's Choe Ryong-hae leaves for Beijing to attend military parade: report
1948 , title : N. Korean economic conditions appear to be improving: CRS report
2878 , title : Kim Jong-un offers condolences to China ship sinking victims: report
4146 , title : S. Korea voices regret over N. Korea's flak against Park
cluster 10
2458 , title : Dual-income families rise amid economic slump in 2014
5688 , title : S. Korea opens int'l bidding for recovery of sunken ferry
3416 , title : U.S. rejects N. Korea's nuclear warhead miniaturization claims
6814 , title : Household loan growth picks up in Feb.
1916 , title : U.S. intelligence sees possibility of N.K. provocation around October: S. Korean lawmaker
cluster 18
619 , title : Koreas to hold inter-Korean meeting this week
705 , title : Koreas to hold inter-Korean talks next week
344 , title : Koreas to hold high-level talks this week
577 , title : Koreas agree to hold high-level talks next month
3243 , title : Koreas to conduct pest control on pine trees at NK mountain next week