Created
June 24, 2015 17:42
-
-
Save aladagemre/a44da0cc29dd2621b4a4 to your computer and use it in GitHub Desktop.
TDK'dan bir küme kelimenin eş anlamlılarını çeken betik
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# TDK sözlüğünden eş anlamlıları çeker. | |
# words.txt dosyasında her satırda bir kelime olacak şekilde girdi listelenir. | |
# meanings.txt dosyasına her kelime için eş anlamlılarını bulur ve satır satır yazar. | |
import requests | |
import codecs | |
from bs4 import BeautifulSoup | |
wordlist = open("words.txt").read().split("\n") | |
meaninglist = codecs.open("meanings.txt", "w", encoding="utf-8") | |
for keyword in wordlist: | |
if not keyword: | |
continue | |
payload = {'keyword': keyword} | |
r = requests.post('http://www.tdk.gov.tr/index.php?option=com_esanlamlar&arama=esanlam', data=payload) | |
soup = BeautifulSoup(r.text) | |
results = soup.find_all('td', class_="meaning") | |
assert len(results) == 1, "Not one result: %s" % results | |
result = results[0].getText() | |
meaninglist.write(result + "\n") | |
meaninglist.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment