jbinfo · September 26, 2025 23:40 · jbinfo · Sep 26, 2025
diff --git a/prayers_scraper.py b/prayers_scraper.py
 import scrapy
 import csv
 import re

 class PrayerTimesSpider(scrapy.Spider):
    name = 'prayer_times'
    start_urls = [
        'https://habous.gov.ma/prieres/horaire_hijri_2.php?ville=58', # Casablanca city
        'https://habous.gov.ma/prieres/horaire_hijri_2.php?ville=142' # Agdz city
    ]
    
    def __init__(self):
        self.prayer_data = {}
    
    def parse(self, response):
        # Extract city ID from URL
        city_id = re.search(r'ville=(\d+)', response.url).group(1)
        
        # Extract city name
        city_name = self.extract_city_name(response, city_id)
        
        # Parse prayer times using XPath - target table with id="horaire"
        prayer_times = []
        
        # Find all data rows in the horaire table (skip header row)
        rows = response.xpath('//table[@id="horaire"]//tr[position()>1]')
        
        for row in rows:
            # Get all cell texts from this row
            cells = row.xpath('./td/text()').getall()
            cells = [cell.strip() for cell in cells if cell.strip()]
            
            # Skip if not enough cells (need at least 9 columns based on the table structure)
            if len(cells) < 9:
                continue
            
            # Skip rows with special text like "حسب نتيجة المراقبة"
            if any('حسب' in cell or 'المراقبة' in cell for cell in cells):
                continue
            
            # Extract date from 3rd column (سبتمبر / أكتوبر column)
            date = cells[2] if len(cells) > 2 else ''
            
            # Skip if date is not a number
            if not re.match(r'^\d+$', date):
                continue
            
            # Build prayer row based on table structure:
            # 0: Day name, 1: Hijri date, 2: Gregorian date, 3: Fajr, 4: Sunrise, 5: Dhuhr, 6: Asr, 7: Maghrib, 8: Isha
            prayer_row = {
                'Date': date,
                'Fajr': self.clean_time(cells[3] if len(cells) > 3 else ''),
                'Sunrise': self.clean_time(cells[4] if len(cells) > 4 else ''),
                'Dhuhr': self.clean_time(cells[5] if len(cells) > 5 else ''),
                'Asr': self.clean_time(cells[6] if len(cells) > 6 else ''),
                'Maghrib': self.clean_time(cells[7] if len(cells) > 7 else ''),
                'Isha': self.clean_time(cells[8] if len(cells) > 8 else '')
            }
            
            # Validate and add if times look correct
            if self.is_valid_prayer_row(prayer_row):
                prayer_times.append(prayer_row)
        
        # Store data
        self.prayer_data[city_id] = {
            'city_name': city_name,
            'prayer_times': prayer_times
        }
        
        self.logger.info(f"Scraped {len(prayer_times)} prayer times for {city_name}")
    
    def extract_city_name(self, response, city_id):
        """Extract city name from selected option in ville select box"""
        city_name = response.xpath('//select[@name="ville"]/option[@selected]/text()').get()
        return city_name.strip() if city_name else f"City_{city_id}"
    
    def clean_time(self, time_text):
        """Clean and format time"""
        time_text = time_text.strip()
        
        # Remove any extra spaces
        time_text = re.sub(r'\s+', '', time_text)
        
        # Check if it's a valid time format
        if re.match(r'^\d{1,2}:\d{2}$', time_text):
            # Ensure 2-digit hour format
            parts = time_text.split(':')
            hour = parts[0].zfill(2)
            minute = parts[1]
            return f"{hour}:{minute}"
        
        return time_text
    
    def is_valid_prayer_row(self, row):
        """Check if prayer row has valid times"""
        time_pattern = r'^\d{2}:\d{2}$'
        
        # Check essential prayers have valid time format
        essential = ['Fajr', 'Dhuhr', 'Maghrib']
        return all(re.match(time_pattern, row.get(prayer, '')) for prayer in essential)
    
    def closed(self, reason):
        """Write CSV files when spider finishes"""
        for city_id, data in self.prayer_data.items():
            filename = f"prayer_times_{data['city_name'].replace(' ', '_')}_{city_id}.csv"
            self.write_csv(filename, data['prayer_times'])
            print(f"Created: {filename}")
    
    def write_csv(self, filename, prayer_times):
        """Write prayer times to CSV"""
        if not prayer_times:
            return
        
        with open(filename, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=['Date', 'Fajr', 'Sunrise', 'Dhuhr', 'Asr', 'Maghrib', 'Isha'])
            writer.writeheader()
            writer.writerows(prayer_times)


 # Simple runner
 if __name__ == '__main__':
    from scrapy.crawler import CrawlerProcess
    
    process = CrawlerProcess({
        'USER_AGENT': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
        'ROBOTSTXT_OBEY': False,
        'DOWNLOAD_DELAY': 1
    })
    
    process.crawl(PrayerTimesSpider)
    process.start()
	import scrapy
	import csv
	import re

	class PrayerTimesSpider(scrapy.Spider):
	name = 'prayer_times'
	start_urls = [
	'https://habous.gov.ma/prieres/horaire_hijri_2.php?ville=58', # Casablanca city
	'https://habous.gov.ma/prieres/horaire_hijri_2.php?ville=142' # Agdz city
	]

	def __init__(self):
	self.prayer_data = {}

	def parse(self, response):
	# Extract city ID from URL
	city_id = re.search(r'ville=(\d+)', response.url).group(1)

	# Extract city name
	city_name = self.extract_city_name(response, city_id)

	# Parse prayer times using XPath - target table with id="horaire"
	prayer_times = []

	# Find all data rows in the horaire table (skip header row)
	rows = response.xpath('//table[@id="horaire"]//tr[position()>1]')

	for row in rows:
	# Get all cell texts from this row
	cells = row.xpath('./td/text()').getall()
	cells = [cell.strip() for cell in cells if cell.strip()]

	# Skip if not enough cells (need at least 9 columns based on the table structure)
	if len(cells) < 9:
	continue

	# Skip rows with special text like "حسب نتيجة المراقبة"
	if any('حسب' in cell or 'المراقبة' in cell for cell in cells):
	continue

	# Extract date from 3rd column (سبتمبر / أكتوبر column)
	date = cells[2] if len(cells) > 2 else ''

	# Skip if date is not a number
	if not re.match(r'^\d+$', date):
	continue

	# Build prayer row based on table structure:
	# 0: Day name, 1: Hijri date, 2: Gregorian date, 3: Fajr, 4: Sunrise, 5: Dhuhr, 6: Asr, 7: Maghrib, 8: Isha
	prayer_row = {
	'Date': date,
	'Fajr': self.clean_time(cells[3] if len(cells) > 3 else ''),
	'Sunrise': self.clean_time(cells[4] if len(cells) > 4 else ''),
	'Dhuhr': self.clean_time(cells[5] if len(cells) > 5 else ''),
	'Asr': self.clean_time(cells[6] if len(cells) > 6 else ''),
	'Maghrib': self.clean_time(cells[7] if len(cells) > 7 else ''),
	'Isha': self.clean_time(cells[8] if len(cells) > 8 else '')
	}

	# Validate and add if times look correct
	if self.is_valid_prayer_row(prayer_row):
	prayer_times.append(prayer_row)

	# Store data
	self.prayer_data[city_id] = {
	'city_name': city_name,
	'prayer_times': prayer_times
	}

	self.logger.info(f"Scraped {len(prayer_times)} prayer times for {city_name}")

	def extract_city_name(self, response, city_id):
	"""Extract city name from selected option in ville select box"""
	city_name = response.xpath('//select[@name="ville"]/option[@selected]/text()').get()
	return city_name.strip() if city_name else f"City_{city_id}"

	def clean_time(self, time_text):
	"""Clean and format time"""
	time_text = time_text.strip()

	# Remove any extra spaces
	time_text = re.sub(r'\s+', '', time_text)

	# Check if it's a valid time format
	if re.match(r'^\d{1,2}:\d{2}$', time_text):
	# Ensure 2-digit hour format
	parts = time_text.split(':')
	hour = parts[0].zfill(2)
	minute = parts[1]
	return f"{hour}:{minute}"

	return time_text

	def is_valid_prayer_row(self, row):
	"""Check if prayer row has valid times"""
	time_pattern = r'^\d{2}:\d{2}$'

	# Check essential prayers have valid time format
	essential = ['Fajr', 'Dhuhr', 'Maghrib']
	return all(re.match(time_pattern, row.get(prayer, '')) for prayer in essential)

	def closed(self, reason):
	"""Write CSV files when spider finishes"""
	for city_id, data in self.prayer_data.items():
	filename = f"prayer_times_{data['city_name'].replace(' ', '_')}_{city_id}.csv"
	self.write_csv(filename, data['prayer_times'])
	print(f"Created: {filename}")

	def write_csv(self, filename, prayer_times):
	"""Write prayer times to CSV"""
	if not prayer_times:
	return

	with open(filename, 'w', newline='', encoding='utf-8') as f:
	writer = csv.DictWriter(f, fieldnames=['Date', 'Fajr', 'Sunrise', 'Dhuhr', 'Asr', 'Maghrib', 'Isha'])
	writer.writeheader()
	writer.writerows(prayer_times)


	# Simple runner
	if __name__ == '__main__':
	from scrapy.crawler import CrawlerProcess

	process = CrawlerProcess({
	'USER_AGENT': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
	'ROBOTSTXT_OBEY': False,
	'DOWNLOAD_DELAY': 1
	})

	process.crawl(PrayerTimesSpider)
	process.start()
ID	City Name (Arabic)	ID	City Name (Arabic)	ID	City Name (Arabic)
1	الرباط	58	الدار البيضاء	115	تامصلوحت
2	الخميسات	59	المحمدية	116	قطارة
3	تيفلت	60	بن سليمان	117	أكادير
4	الرماني	61	سطات	118	تارودانت
5	والماس	62	الكارة	119	تزنيت
6	بوزنيقة	63	البروج	120	إغرم
7	القنيطرة	64	ابن أحمد	121	تالوين
8	سيدي قاسم	65	برشيد	122	تافراوت
9	سيدي يحيى الغرب	66	الجديدة	123	طاطا
10	سيدي سليمان	67	أزمور	124	أقا
11	سوق أربعاء الغرب	68	سيدي بنور	125	فم لحصن
12	عرباوة	69	خميس الزمامرة	126	بويكرة
13	مولاي بوسلهام	70	خنيفرة	127	أولاد تايمة
14	طنجة	71	مولاي بوعزة	128	الرشيدية
15	تطوان	72	زاوية أحنصال	129	الريصاني
16	العرائش	73	بني ملال	130	أرفود
17	أصيلة	74	أزيلال	131	تنديت
18	شفشاون	75	الفقيه بنصالح	132	كلميمة
19	مرتيل	76	دمنات	133	إملشيل
20	المضيق	77	القصيبة	134	تنجداد
21	القصر الكبير	78	قصبة تادلة	135	الريش
22	القصر الصغير	79	خريبكة	136	ميدلت
23	الحسيمة	80	وادي زم	137	زاكورة
24	سبتة	81	فاس	138	ورزازات
25	الفنيدق	82	صفرو	139	تنغير
26	الجبهة	83	مولاي يعقوب	140	هسكورة
27	واد لاو	84	بولمان	141	قلعة مكونة
28	باب برد	85	ميسور	142	أكدز
29	وزان	86	رباط الخير	143	بومالن دادس
30	بوسكور	87	المنزل بني يازغة	144	النيف
31	وجدة	88	إموزار كندر	145	أسول
32	بركان	89	تازة	146	أمسمرير
33	فكيك	90	تاونات	147	تازارين
34	بوعرفة	91	أكنول	148	سيدي إفني
35	كرسيف	92	تيزي وسلي	149	كلميم
36	جرادة	93	بورد	150	أسا
37	عين الشعير	94	تاهلة	151	الزاك
38	تاوريرت	95	تيسة	152	طانطان
39	الناظور	96	قرية با محمد	153	بويزكارن
40	مليلية	97	كتامة	154	المحبس
41	دبدو	98	واد أمليل	155	لمسيد
42	سلوان	99	مكناس	156	العيون
43	بني أنصار	100	يفرن	157	السمارة
44	فرخانة	101	الحاجب	158	بوجدور
45	تالسينت	102	زرهون	159	طرفاية
46	تندرارة	103	آزرو	160	تفاريتي
47	العيون الشرقية	104	مراكش	161	بوكراع
48	بني ادرار	105	قلعة السراغنة	162	كلتة زمور
49	السعيدية	106	الصويرة	163	أمكالة
50	رأس الماء	107	شيشاوة	164	أخفنير
51	تافوغالت	108	بنجرير	165	الداخلة
52	فزوان	109	الرحامنة	166	الكويرة
53	أحفير	110	تمنار	167	أوسرد
54	زايو	111	آسفي	168	بئر كندوز
55	دريوش	112	الوليدية	169	بئر أنزاران
56	بني تجيت	113	اليوسفية	301	خميس سيدي عبد الجليل
57	بوعنان	114	تسلطانت	302	أولاد عياد
				303	تاهلة
				304	مطماطة
				305	إيمنتانوت
				306	سيدي غانم
				307	تفنتان
				308	آيت القاق
				309	أكدال أملشيل
				310	اكودال املشيل ميدلت
				311	أكايوار
				312	عين العودة
				313	أسكين
				314	آيت ورير
				315	زاوية مولاي ابراهيم
				316	تولكولت
				317	إيكس
				318	كرس
				319	تيسنت
				320	فم زكيد
				321	قصر إيش
				322	إيمين ثلاث