Created
September 26, 2025 23:40
-
-
Save jbinfo/dcc07559dd4a31a400dd20c91ccfd19a to your computer and use it in GitHub Desktop.
Prayer Times Scraper for Morocco - Extracts Islamic prayer times from official Ministry of Habous website (habous.gov.ma) and outputs CSV format compatible with Mawaqit App for TV. Supports multiple cities, handles Arabic text, and provides clean formatted data (Fajr, Sunrise, Dhuhr, Asr, Maghrib, Isha).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import scrapy | |
| import csv | |
| import re | |
| class PrayerTimesSpider(scrapy.Spider): | |
| name = 'prayer_times' | |
| start_urls = [ | |
| 'https://habous.gov.ma/prieres/horaire_hijri_2.php?ville=58', # Casablanca city | |
| 'https://habous.gov.ma/prieres/horaire_hijri_2.php?ville=142' # Agdz city | |
| ] | |
| def __init__(self): | |
| self.prayer_data = {} | |
| def parse(self, response): | |
| # Extract city ID from URL | |
| city_id = re.search(r'ville=(\d+)', response.url).group(1) | |
| # Extract city name | |
| city_name = self.extract_city_name(response, city_id) | |
| # Parse prayer times using XPath - target table with id="horaire" | |
| prayer_times = [] | |
| # Find all data rows in the horaire table (skip header row) | |
| rows = response.xpath('//table[@id="horaire"]//tr[position()>1]') | |
| for row in rows: | |
| # Get all cell texts from this row | |
| cells = row.xpath('./td/text()').getall() | |
| cells = [cell.strip() for cell in cells if cell.strip()] | |
| # Skip if not enough cells (need at least 9 columns based on the table structure) | |
| if len(cells) < 9: | |
| continue | |
| # Skip rows with special text like "حسب نتيجة المراقبة" | |
| if any('حسب' in cell or 'المراقبة' in cell for cell in cells): | |
| continue | |
| # Extract date from 3rd column (سبتمبر / أكتوبر column) | |
| date = cells[2] if len(cells) > 2 else '' | |
| # Skip if date is not a number | |
| if not re.match(r'^\d+$', date): | |
| continue | |
| # Build prayer row based on table structure: | |
| # 0: Day name, 1: Hijri date, 2: Gregorian date, 3: Fajr, 4: Sunrise, 5: Dhuhr, 6: Asr, 7: Maghrib, 8: Isha | |
| prayer_row = { | |
| 'Date': date, | |
| 'Fajr': self.clean_time(cells[3] if len(cells) > 3 else ''), | |
| 'Sunrise': self.clean_time(cells[4] if len(cells) > 4 else ''), | |
| 'Dhuhr': self.clean_time(cells[5] if len(cells) > 5 else ''), | |
| 'Asr': self.clean_time(cells[6] if len(cells) > 6 else ''), | |
| 'Maghrib': self.clean_time(cells[7] if len(cells) > 7 else ''), | |
| 'Isha': self.clean_time(cells[8] if len(cells) > 8 else '') | |
| } | |
| # Validate and add if times look correct | |
| if self.is_valid_prayer_row(prayer_row): | |
| prayer_times.append(prayer_row) | |
| # Store data | |
| self.prayer_data[city_id] = { | |
| 'city_name': city_name, | |
| 'prayer_times': prayer_times | |
| } | |
| self.logger.info(f"Scraped {len(prayer_times)} prayer times for {city_name}") | |
| def extract_city_name(self, response, city_id): | |
| """Extract city name from selected option in ville select box""" | |
| city_name = response.xpath('//select[@name="ville"]/option[@selected]/text()').get() | |
| return city_name.strip() if city_name else f"City_{city_id}" | |
| def clean_time(self, time_text): | |
| """Clean and format time""" | |
| time_text = time_text.strip() | |
| # Remove any extra spaces | |
| time_text = re.sub(r'\s+', '', time_text) | |
| # Check if it's a valid time format | |
| if re.match(r'^\d{1,2}:\d{2}$', time_text): | |
| # Ensure 2-digit hour format | |
| parts = time_text.split(':') | |
| hour = parts[0].zfill(2) | |
| minute = parts[1] | |
| return f"{hour}:{minute}" | |
| return time_text | |
| def is_valid_prayer_row(self, row): | |
| """Check if prayer row has valid times""" | |
| time_pattern = r'^\d{2}:\d{2}$' | |
| # Check essential prayers have valid time format | |
| essential = ['Fajr', 'Dhuhr', 'Maghrib'] | |
| return all(re.match(time_pattern, row.get(prayer, '')) for prayer in essential) | |
| def closed(self, reason): | |
| """Write CSV files when spider finishes""" | |
| for city_id, data in self.prayer_data.items(): | |
| filename = f"prayer_times_{data['city_name'].replace(' ', '_')}_{city_id}.csv" | |
| self.write_csv(filename, data['prayer_times']) | |
| print(f"Created: {filename}") | |
| def write_csv(self, filename, prayer_times): | |
| """Write prayer times to CSV""" | |
| if not prayer_times: | |
| return | |
| with open(filename, 'w', newline='', encoding='utf-8') as f: | |
| writer = csv.DictWriter(f, fieldnames=['Date', 'Fajr', 'Sunrise', 'Dhuhr', 'Asr', 'Maghrib', 'Isha']) | |
| writer.writeheader() | |
| writer.writerows(prayer_times) | |
| # Simple runner | |
| if __name__ == '__main__': | |
| from scrapy.crawler import CrawlerProcess | |
| process = CrawlerProcess({ | |
| 'USER_AGENT': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36', | |
| 'ROBOTSTXT_OBEY': False, | |
| 'DOWNLOAD_DELAY': 1 | |
| }) | |
| process.crawl(PrayerTimesSpider) | |
| process.start() |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Installation & Usage
Prerequisites
# Install Python dependencies pip install scrapyQuick Start
Output
prayer_times_CityName_58.csvandprayer_times_CityName_142.csvDate,Fajr,Sunrise,Dhuhr,Asr,Maghrib,IshaComplete List of Available Cities
Customization
To scrape different cities, modify the
start_urlslist:Popular Cities Quick Reference
Notes
Troubleshooting
If you encounter SSL errors:
For permission issues on macOS/Linux: