rightson · December 22, 2024 12:37
diff --git a/coolpc_tui.py b/coolpc_tui.py
 import requests
 import os
 import argparse
 import json
 import re
 from datetime import datetime
 from bs4 import BeautifulSoup

 def fetch_or_load_html(url, cache_file='coolpc_cache.html', force_refresh=False):
    """
    Fetch HTML from URL or load from cache based on settings
    """
    try:
        # Check if cache exists and should be used
        if not force_refresh and os.path.exists(cache_file):
            cache_time = datetime.fromtimestamp(os.path.getmtime(cache_file))
            print(f"Debug: Found cache file from {cache_time}")
            with open(cache_file, 'r', encoding='cp950', errors='replace') as f:
                return f.read()

        # Fetch from URL
        print(f"Debug: Fetching fresh copy from {url}")
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8'
        }
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        # Explicitly set CP950 encoding
        response.encoding = 'cp950'

        # Save to cache using CP950 encoding
        with open(cache_file, 'w', encoding='cp950', errors='replace') as f:
            f.write(response.text)
        print(f"Debug: Saved new cache file at {datetime.now()}")

        return response.text

    except requests.RequestException as e:
        print(f"Error fetching URL: {e}")
        if os.path.exists(cache_file):
            print("Debug: Using existing cache file due to fetch error")
            with open(cache_file, 'r', encoding='cp950', errors='replace') as f:
                return f.read()
        raise

 def parse_option_text(text):
    """Parse the option text into title, price, and popularity"""
    title = ""
    price = "N/A"
    popularity = ""

    # Extract price
    price_match = re.search(r'\$\d+,?\d*', text)
    if price_match:
        price = price_match.group(0)

    # Get popularity indicators
    popularity = '★' if '★' in text else ''
    popularity = '♥' if '♥' in text else popularity

    # Get title
    if price_match:
        title = text[:price_match.start()].strip(' ,')
    else:
        title = text.strip()

    return title, price, popularity

 def extract_select_options(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    tbody = soup.find('tbody', id='tbdy')
    if not tbody:
        print("Debug: tbody with id 'tbdy' not found")
        return None, None
    
    json_results = []
    md_content = []  # Store markdown content
    
    selects = tbody.find_all('select')
    print(f"Debug: Found {len(selects)} select elements\n")
    
    for select_num, select in enumerate(selects, 1):
        select_data = {
            'select_num': select_num,
            'name': select.get('name', 'N/A'),
            'groups': []
        }
        
        # Add to markdown
        md_content.append(f"\n## Select #{select_num} (name: {select.get('name', 'N/A')})")
        
        optgroups = select.find_all('optgroup')
        if optgroups:
            for optgroup in optgroups:
                current_category = optgroup.get('label', 'No Label')
                group_data = {
                    'title': current_category,
                    'options': []
                }
                
                # Add to markdown
                md_content.append(f"\n### {current_category}")
                md_content.append("\n| Title | Price | Popularity |")
                md_content.append("|-------|-------|------------|")
                
                options = optgroup.find_all('option')
                for option in options:
                    if not option.get('disabled'):  # Skip disabled options
                        title, price, popularity = parse_option_text(option.get_text(strip=True))
                        
                        # Add to JSON
                        option_data = {
                            'title': title,
                            'price': price,
                            'popularity': popularity
                        }
                        group_data['options'].append(option_data)
                        
                        # Add to markdown
                        md_content.append(f"| {title} | {price} | {popularity} |")
                
                select_data['groups'].append(group_data)
                md_content.append("")  # Add blank line after table
        
        json_results.append(select_data)
    
    return json_results, "\n".join(md_content)

 def save_json_with_encoding(data, filename):
    """Save JSON with proper encoding handling"""
    try:
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
    except UnicodeEncodeError:
        # Fallback to CP950 if UTF-8 fails
        with open(filename, 'w', encoding='cp950', errors='replace') as f:
            json.dump(data, f, ensure_ascii=False, indent=2)

 def save_markdown(content, filename):
    """Save markdown content with proper encoding"""
    try:
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(content)
    except UnicodeEncodeError:
        with open(filename, 'w', encoding='cp950', errors='replace') as f:
            f.write(content)

 def main():
    parser = argparse.ArgumentParser(description='Parse COOLPC select options')
    parser.add_argument('--refresh', action='store_true', 
                      help='Force refresh cache and fetch new data')
    parser.add_argument('--url', default="https://www.coolpc.com.tw/evaluate.php",
                      help='URL to fetch (default: COOLPC evaluate page)')
    parser.add_argument('--cache', default="coolpc_cache.html",
                      help='Cache file path (default: coolpc_cache.html)')
    parser.add_argument('--json', default="coolpc_options.json",
                      help='Output JSON file path (default: coolpc_options.json)')
    parser.add_argument('--md', default="coolpc_options.md",
                      help='Output Markdown file path (default: coolpc_options.md)')
    
    args = parser.parse_args()
    
    try:
        # Show cache status
        if os.path.exists(args.cache) and not args.refresh:
            cache_time = datetime.fromtimestamp(os.path.getmtime(args.cache))
            print(f"Using cached data from: {cache_time}")
        elif args.refresh:
            print("Forcing cache refresh...")
        else:
            print("No cache found, will fetch fresh data...")
        
        # Fetch/load HTML
        html_content = fetch_or_load_html(args.url, args.cache, args.refresh)
        json_results, md_content = extract_select_options(html_content)
        
        # Save results
        if json_results:
            save_json_with_encoding(json_results, args.json)
            print(f"\nJSON results saved to {args.json}")
        
        if md_content:
            save_markdown(md_content, args.md)
            print(f"Markdown results saved to {args.md}")
    
    except Exception as e:
        print(f"Error: {e}")

 if __name__ == "__main__":
    main()
diff --git a/coolpc_tui_utf8.py b/coolpc_tui_utf8.py
 import requests
 import os
 import argparse
 import json
 import re
 from datetime import datetime
 from bs4 import BeautifulSoup


 def fetch_or_load_html(url, cache_file='coolpc_cache.html', force_refresh=False):
    """
    Fetch HTML from URL or load from cache based on settings
    """
    try:
        # Check if cache exists and should be used
        if not force_refresh and os.path.exists(cache_file):
            cache_time = datetime.fromtimestamp(os.path.getmtime(cache_file))
            print(f"Debug: Found cache file from {cache_time}")
            with open(cache_file, 'r', encoding='utf-8') as f:
                return f.read()
        
        # Fetch from URL
        print(f"Debug: Fetching fresh copy from {url}")
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
        }
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        
        # Convert content from big5 to utf-8
        content = response.content.decode('big5', errors='replace')
        
        # Save to cache using utf-8
        with open(cache_file, 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"Debug: Saved new cache file at {datetime.now()}")
        
        return content
    
    except requests.RequestException as e:
        print(f"Error fetching URL: {e}")
        if os.path.exists(cache_file):
            print("Debug: Using existing cache file due to fetch error")
            with open(cache_file, 'r', encoding='utf-8') as f:
                return f.read()
        raise

 def parse_option_text(text):
    """Parse the option text into title, price, and popularity"""
    title = ""
    price = "N/A"
    popularity = ""

    # Extract price
    price_match = re.search(r'\$\d+,?\d*', text)
    if price_match:
        price = price_match.group(0)
    
    # Convert special characters to UTF-8 compatible ones
    if '★' in text:
        popularity = '⭐'  # Unicode star
    elif '♥' in text:
        popularity = '❤️'  # Unicode heart
    
    # Get title
    if price_match:
        title = text[:price_match.start()].strip(' ,')
    else:
        title = text.strip()

    return title, price, popularity

 def extract_select_options(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    tbody = soup.find('tbody', id='tbdy')
    if not tbody:
        print("Debug: tbody with id 'tbdy' not found")
        return None, None
    
    json_results = []
    md_content = []  # Store markdown content
    
    selects = tbody.find_all('select')
    print(f"Debug: Found {len(selects)} select elements\n")
    
    for select_num, select in enumerate(selects, 1):
        select_data = {
            'select_num': select_num,
            'name': select.get('name', 'N/A'),
            'groups': []
        }
        
        # Add to markdown
        md_content.append(f"\n## Select #{select_num} (name: {select.get('name', 'N/A')})")
        
        optgroups = select.find_all('optgroup')
        if optgroups:
            for optgroup in optgroups:
                current_category = optgroup.get('label', 'No Label')
                group_data = {
                    'title': current_category,
                    'options': []
                }
                
                # Add to markdown
                md_content.append(f"\n### {current_category}")
                md_content.append("\n| Title | Price | Popularity |")
                md_content.append("|-------|-------|------------|")
                
                options = optgroup.find_all('option')
                for option in options:
                    if not option.get('disabled'):  # Skip disabled options
                        title, price, popularity = parse_option_text(option.get_text(strip=True))
                        
                        # Add to JSON
                        option_data = {
                            'title': title,
                            'price': price,
                            'popularity': popularity
                        }
                        group_data['options'].append(option_data)
                        
                        # Add to markdown
                        md_content.append(f"| {title} | {price} | {popularity} |")
                
                select_data['groups'].append(group_data)
                md_content.append("")  # Add blank line after table
        
        json_results.append(select_data)
    
    return json_results, "\n".join(md_content)

 def save_json_with_encoding(data, filename):
    """Save JSON with UTF-8 encoding"""
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

 def save_markdown(content, filename):
    """Save markdown content with UTF-8 encoding"""
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(content)

 def main():
    parser = argparse.ArgumentParser(description='Parse COOLPC select options')
    parser.add_argument('--refresh', action='store_true', 
                      help='Force refresh cache and fetch new data')
    parser.add_argument('--url', default="https://www.coolpc.com.tw/evaluate.php",
                      help='URL to fetch (default: COOLPC evaluate page)')
    parser.add_argument('--cache', default="coolpc_cache.html",
                      help='Cache file path (default: coolpc_cache.html)')
    parser.add_argument('--json', default="coolpc_options.json",
                      help='Output JSON file path (default: coolpc_options.json)')
    parser.add_argument('--md', default="coolpc_options.md",
                      help='Output Markdown file path (default: coolpc_options.md)')
    
    args = parser.parse_args()
    
    try:
        # Show cache status
        if os.path.exists(args.cache) and not args.refresh:
            cache_time = datetime.fromtimestamp(os.path.getmtime(args.cache))
            print(f"Using cached data from: {cache_time}")
        elif args.refresh:
            print("Forcing cache refresh...")
        else:
            print("No cache found, will fetch fresh data...")
        
        # Fetch/load HTML
        html_content = fetch_or_load_html(args.url, args.cache, args.refresh)
        json_results, md_content = extract_select_options(html_content)
        
        # Save results
        if json_results:
            save_json_with_encoding(json_results, args.json)
            print(f"\nJSON results saved to {args.json}")
        
        if md_content:
            save_markdown(md_content, args.md)
            print(f"Markdown results saved to {args.md}")
    
    except Exception as e:
        print(f"Error: {e}")

 if __name__ == "__main__":
    main()
	import requests
	import os
	import argparse
	import json
	import re
	from datetime import datetime
	from bs4 import BeautifulSoup

	def fetch_or_load_html(url, cache_file='coolpc_cache.html', force_refresh=False):
	"""
	Fetch HTML from URL or load from cache based on settings
	"""
	try:
	# Check if cache exists and should be used
	if not force_refresh and os.path.exists(cache_file):
	cache_time = datetime.fromtimestamp(os.path.getmtime(cache_file))
	print(f"Debug: Found cache file from {cache_time}")
	with open(cache_file, 'r', encoding='cp950', errors='replace') as f:
	return f.read()

	# Fetch from URL
	print(f"Debug: Fetching fresh copy from {url}")
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8'
	}
	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status()

	# Explicitly set CP950 encoding
	response.encoding = 'cp950'

	# Save to cache using CP950 encoding
	with open(cache_file, 'w', encoding='cp950', errors='replace') as f:
	f.write(response.text)
	print(f"Debug: Saved new cache file at {datetime.now()}")

	return response.text

	except requests.RequestException as e:
	print(f"Error fetching URL: {e}")
	if os.path.exists(cache_file):
	print("Debug: Using existing cache file due to fetch error")
	with open(cache_file, 'r', encoding='cp950', errors='replace') as f:
	return f.read()
	raise

	def parse_option_text(text):
	"""Parse the option text into title, price, and popularity"""
	title = ""
	price = "N/A"
	popularity = ""

	# Extract price
	price_match = re.search(r'\$\d+,?\d*', text)
	if price_match:
	price = price_match.group(0)

	# Get popularity indicators
	popularity = '★' if '★' in text else ''
	popularity = '♥' if '♥' in text else popularity

	# Get title
	if price_match:
	title = text[:price_match.start()].strip(' ,')
	else:
	title = text.strip()

	return title, price, popularity

	def extract_select_options(html_content):
	soup = BeautifulSoup(html_content, 'html.parser')
	tbody = soup.find('tbody', id='tbdy')
	if not tbody:
	print("Debug: tbody with id 'tbdy' not found")
	return None, None

	json_results = []
	md_content = [] # Store markdown content

	selects = tbody.find_all('select')
	print(f"Debug: Found {len(selects)} select elements\n")

	for select_num, select in enumerate(selects, 1):
	select_data = {
	'select_num': select_num,
	'name': select.get('name', 'N/A'),
	'groups': []
	}

	# Add to markdown
	md_content.append(f"\n## Select #{select_num} (name: {select.get('name', 'N/A')})")

	optgroups = select.find_all('optgroup')
	if optgroups:
	for optgroup in optgroups:
	current_category = optgroup.get('label', 'No Label')
	group_data = {
	'title': current_category,
	'options': []
	}

	# Add to markdown
	md_content.append(f"\n### {current_category}")
	md_content.append("\n\| Title \| Price \| Popularity \|")
	md_content.append("\|-------\|-------\|------------\|")

	options = optgroup.find_all('option')
	for option in options:
	if not option.get('disabled'): # Skip disabled options
	title, price, popularity = parse_option_text(option.get_text(strip=True))

	# Add to JSON
	option_data = {
	'title': title,
	'price': price,
	'popularity': popularity
	}
	group_data['options'].append(option_data)

	# Add to markdown
	md_content.append(f"\| {title} \| {price} \| {popularity} \|")

	select_data['groups'].append(group_data)
	md_content.append("") # Add blank line after table

	json_results.append(select_data)

	return json_results, "\n".join(md_content)

	def save_json_with_encoding(data, filename):
	"""Save JSON with proper encoding handling"""
	try:
	with open(filename, 'w', encoding='utf-8') as f:
	json.dump(data, f, ensure_ascii=False, indent=2)
	except UnicodeEncodeError:
	# Fallback to CP950 if UTF-8 fails
	with open(filename, 'w', encoding='cp950', errors='replace') as f:
	json.dump(data, f, ensure_ascii=False, indent=2)

	def save_markdown(content, filename):
	"""Save markdown content with proper encoding"""
	try:
	with open(filename, 'w', encoding='utf-8') as f:
	f.write(content)
	except UnicodeEncodeError:
	with open(filename, 'w', encoding='cp950', errors='replace') as f:
	f.write(content)

	def main():
	parser = argparse.ArgumentParser(description='Parse COOLPC select options')
	parser.add_argument('--refresh', action='store_true',
	help='Force refresh cache and fetch new data')
	parser.add_argument('--url', default="https://www.coolpc.com.tw/evaluate.php",
	help='URL to fetch (default: COOLPC evaluate page)')
	parser.add_argument('--cache', default="coolpc_cache.html",
	help='Cache file path (default: coolpc_cache.html)')
	parser.add_argument('--json', default="coolpc_options.json",
	help='Output JSON file path (default: coolpc_options.json)')
	parser.add_argument('--md', default="coolpc_options.md",
	help='Output Markdown file path (default: coolpc_options.md)')

	args = parser.parse_args()

	try:
	# Show cache status
	if os.path.exists(args.cache) and not args.refresh:
	cache_time = datetime.fromtimestamp(os.path.getmtime(args.cache))
	print(f"Using cached data from: {cache_time}")
	elif args.refresh:
	print("Forcing cache refresh...")
	else:
	print("No cache found, will fetch fresh data...")

	# Fetch/load HTML
	html_content = fetch_or_load_html(args.url, args.cache, args.refresh)
	json_results, md_content = extract_select_options(html_content)

	# Save results
	if json_results:
	save_json_with_encoding(json_results, args.json)
	print(f"\nJSON results saved to {args.json}")

	if md_content:
	save_markdown(md_content, args.md)
	print(f"Markdown results saved to {args.md}")

	except Exception as e:
	print(f"Error: {e}")

	if __name__ == "__main__":
	main()