Last active
September 27, 2025 03:19
-
-
Save cyb3rsalih/238117f458ecdcd92e2d310655d6e965 to your computer and use it in GitHub Desktop.
Find Turkish Newspaper for a Date; - Updated. Old site is now has payment wall. I found free
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Example usage python3 gaste.py "23 Nisan 1935" | |
| import concurrent.futures | |
| import sys | |
| import urllib.parse | |
| import requests | |
| gazeteler = [ | |
| "aksam", | |
| "acik%20soz", | |
| "anadolu", | |
| "aravelk", | |
| "aydin", | |
| "beyoglu", | |
| "borsa", | |
| "bugun(siyasi,iktisadi,ictimai,gundelik%20gazete)/bugun(siyasi,iktisadi,ictimai,gundelik%20gazete)", | |
| "cumhuriyet", | |
| "dogu", | |
| "en%20son%20dakika", | |
| "en%20son%20havadis", | |
| "haber(aksam%20postasi)", | |
| "hakikat", | |
| "hakimiyeti%20milliye", | |
| "hakkin%20sesi", | |
| "halkin%20dili", | |
| "halkin%20sesi", | |
| "ikdam%20(halk%20gazetesi)", | |
| "ikdam(cumhuriyet%20icin,%20halk%20icin)", | |
| "ikdam(sabah%20postasi)", | |
| "izmir%20postasi", | |
| "jamanak", | |
| "kurun", | |
| "milliyet", | |
| "munakasa", | |
| "piyasa%20cetveli", | |
| "savas", | |
| "son%20dakika", | |
| "son%20posta", | |
| "son%20telgraf", | |
| "son%20saat", | |
| "tan", | |
| "tasviri%20efkar", | |
| "turk%20sozu", | |
| "turk%20dili", | |
| "turkische%20post", | |
| "ulus", | |
| "ulus%20sesi", | |
| "ulusal%20birlik", | |
| "vakit", | |
| "vatan", | |
| "yarin", | |
| "yeni%20asir", | |
| "yeni%20mersin", | |
| "yeni%20sabah", | |
| "yenigun", | |
| "yeniyol" | |
| ] | |
| # Mapping of Turkish month names to their lowercase values for URL | |
| turkish_months = { | |
| "Ocak": "ocak", | |
| "Şubat": "subat", | |
| "Mart": "mart", | |
| "Nisan": "nisan", | |
| "Mayıs": "mayis", | |
| "Haziran": "haziran", | |
| "Temmuz": "temmuz", | |
| "Ağustos": "agustos", | |
| "Eylül": "eylul", | |
| "Ekim": "ekim", | |
| "Kasım": "kasim", | |
| "Aralık": "aralik", | |
| } | |
| # https://nek.istanbul.edu.tr/ekos/GAZETE/izmir%20postasi/izmir%20postasi_1934/izmir%20postasi_1934_temmuz_/izmir%20postasi_1934_temmuz_2_.pdf | |
| def parse_turkish_date(date_str): | |
| day, month_name, year = date_str.split() | |
| month_lower = turkish_months.get(month_name) | |
| if not month_lower: | |
| raise ValueError(f"Invalid Turkish month name: {month_name}") | |
| return { | |
| 'day': int(day), | |
| 'month': month_lower, | |
| 'year': year | |
| } | |
| def get_date(): | |
| if len(sys.argv) > 1: | |
| return parse_turkish_date(sys.argv[1]) | |
| else: | |
| return parse_turkish_date(input("Gün Ay Yıl şeklinde tarih giriniz: ")) | |
| # Using the function | |
| tarih = get_date() | |
| def fetch_gazete(gazete): | |
| # URL encode the newspaper name to handle spaces | |
| gazete_encoded = urllib.parse.quote(gazete) | |
| # Build the new URL format | |
| url = f"https://nek.istanbul.edu.tr/ekos/GAZETE/{gazete_encoded}//{gazete_encoded}_{tarih['year']}/{gazete_encoded}_{tarih['year']}_{tarih['month']}_/{gazete_encoded}_{tarih['year']}_{tarih['month']}_{tarih['day']}_.pdf" | |
| r = requests.head(url, timeout=3) | |
| print(r) | |
| if r.status_code == 200: | |
| print(url) | |
| else: | |
| # Optional: print failed attempts forçç√ debugging | |
| # print(f"Failed to fetch {gazete}: {r.status_code}") | |
| pass | |
| # Adjust the number of threads in the pool | |
| pool_size = 10 | |
| with concurrent.futures.ThreadPoolExecutor(max_workers=pool_size) as executor: | |
| executor.map(fetch_gazete, gazeteler) | |
| print("Bitti") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment