Last active
October 22, 2024 07:13
-
-
Save tolgakurtuluss/97cdb0db5ac38beea7555fc3cfdf84d8 to your computer and use it in GitHub Desktop.
Tarım ve Orman Bakanlığı - Taklit veya Tağşiş Yapılan Gıdalar Scraper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| This script fetches data from the 'T.C. Tarım ve Orman Bakanlığı - Guvenilir Gida' website using the requests library, | |
| processes the response, and converts it into a Pandas DataFrame. | |
| Requirements: | |
| - requests | |
| - pandas | |
| Usage: | |
| 1. Install the required libraries if you haven't already: | |
| pip install requests pandas | |
| 2. Run the script to fetch and display the data. | |
| Date: [22.10.2024] | |
| """ | |
| import requests | |
| import json | |
| import pandas as pd | |
| # Define the headers for the HTTP request | |
| headers = { | |
| 'Accept': 'application/json, text/javascript, */*; q=0.01', | |
| 'Accept-Language': 'tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7', | |
| 'Cache-Control': 'no-cache', | |
| 'Connection': 'keep-alive', | |
| 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
| 'DNT': '1', | |
| 'Origin': 'https://guvenilirgida.tarimorman.gov.tr', | |
| 'Pragma': 'no-cache', | |
| 'Referer': 'https://guvenilirgida.tarimorman.gov.tr/GuvenilirGida/gkd/TaklitVeyaTagsis?siteYayinDurumu=True', | |
| 'Sec-Fetch-Dest': 'empty', | |
| 'Sec-Fetch-Mode': 'cors', | |
| 'Sec-Fetch-Site': 'same-origin', | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36', | |
| 'X-Requested-With': 'XMLHttpRequest', | |
| 'sec-ch-ua': '"Google Chrome";v="129", "Not=A?Brand";v="8", "Chromium";v="129"', | |
| 'sec-ch-ua-mobile': '?0', | |
| 'sec-ch-ua-platform': '"Windows"', | |
| } | |
| # Define the data payload for the POST request | |
| data = { | |
| 'draw': '1', | |
| 'columns[0][data]': 'DuyuruTarihi', | |
| 'columns[0][name]': 'DuyuruTarihi', | |
| 'columns[0][searchable]': 'true', | |
| 'columns[0][orderable]': 'true', | |
| 'columns[0][search][value]': '', | |
| 'columns[0][search][regex]': 'false', | |
| 'columns[1][data]': 'FirmaAdi', | |
| 'columns[1][name]': 'FirmaAdi', | |
| 'columns[1][searchable]': 'true', | |
| 'columns[1][orderable]': 'true', | |
| 'columns[1][search][value]': '', | |
| 'columns[1][search][regex]': 'false', | |
| 'columns[2][data]': 'Marka', | |
| 'columns[2][name]': 'Marka', | |
| 'columns[2][searchable]': 'true', | |
| 'columns[2][orderable]': 'true', | |
| 'columns[2][search][value]': '', | |
| 'columns[2][search][regex]': 'false', | |
| 'columns[3][data]': 'UrunAdi', | |
| 'columns[3][name]': 'UrunAdi', | |
| 'columns[3][searchable]': 'true', | |
| 'columns[3][orderable]': 'true', | |
| 'columns[3][search][value]': '', | |
| 'columns[3][search][regex]': 'false', | |
| 'columns[4][data]': 'Uygunsuzluk', | |
| 'columns[4][name]': 'Uygunsuzluk', | |
| 'columns[4][searchable]': 'true', | |
| 'columns[4][orderable]': 'true', | |
| 'columns[4][search][value]': '', | |
| 'columns[4][search][regex]': 'false', | |
| 'columns[5][data]': 'PartiSeriNo', | |
| 'columns[5][name]': 'PartiSeriNo', | |
| 'columns[5][searchable]': 'true', | |
| 'columns[5][orderable]': 'true', | |
| 'columns[5][search][value]': '', | |
| 'columns[5][search][regex]': 'false', | |
| 'columns[6][data]': 'FirmaIlce', | |
| 'columns[6][name]': 'FirmaIlce', | |
| 'columns[6][searchable]': 'true', | |
| 'columns[6][orderable]': 'true', | |
| 'columns[6][search][value]': '', | |
| 'columns[6][search][regex]': 'false', | |
| 'columns[7][data]': 'FirmaIl', | |
| 'columns[7][name]': 'FirmaIl', | |
| 'columns[7][searchable]': 'true', | |
| 'columns[7][orderable]': 'true', | |
| 'columns[7][search][value]': '', | |
| 'columns[7][search][regex]': 'false', | |
| 'columns[8][data]': 'UrunGrupAdi', | |
| 'columns[8][name]': 'UrunGrupAdi', | |
| 'columns[8][searchable]': 'true', | |
| 'columns[8][orderable]': 'true', | |
| 'columns[8][search][value]': '', | |
| 'columns[8][search][regex]': 'false', | |
| 'order[0][column]': '0', | |
| 'order[0][dir]': 'desc', | |
| 'start': '0', | |
| 'length': '1000', | |
| 'search[value]': '', | |
| 'search[regex]': 'false', | |
| 'KamuoyuDuyuruAra.IdariYaptirimYasalDayanakIdler': '11', | |
| 'KamuoyuDuyuruAra.IdariYaptirimYasalDayanakId': '', | |
| 'SiteYayinDurumu': 'True', | |
| 'KamuoyuDuyuruAra.DuyuruTarihi': '', | |
| '_KamuoyuDuyuruAra_UrunGrupId': '', | |
| 'KamuoyuDuyuruAra.UrunGrupId': '', | |
| 'Order[0][column]': 'DuyuruTarihi', | |
| 'Order[0][dir]': 'desc', | |
| } | |
| # Send the POST request to fetch data | |
| response = requests.post( | |
| 'https://guvenilirgida.tarimorman.gov.tr/GuvenilirGida/GKD/DataTablesList', | |
| headers=headers, | |
| data=data, | |
| ) | |
| # Load the response data into a DataFrame | |
| data = json.loads(response.text)['data'] | |
| df = pd.DataFrame(data) | |
| # Display the DataFrame | |
| print(df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment