Last active
April 8, 2024 08:22
-
-
Save Xnuvers007/abd3e230b5dd5e3d5460a3046e2bd148 to your computer and use it in GitHub Desktop.
scraping website coinmarketcap.com for monitoring crypto
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import os | |
try: | |
import requests | |
from bs4 import BeautifulSoup | |
except (ModuleNotFoundError): | |
os.system('pip install requests beautifulsoup4 --no-cache-dir') | |
finally: | |
import requests | |
from bs4 import BeautifulSoup | |
headers = { | |
'Host': 'coinmarketcap.com', | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', | |
'Accept-Language': 'en-US,en;q=0.5', | |
'Accept-Encoding': 'gzip, deflate, br', | |
'Referer': 'https://www.google.com/', | |
'Upgrade-Insecure-Requests': '1', | |
'Sec-Fetch-Dest': 'document', | |
'Sec-Fetch-Mode': 'navigate', | |
'Sec-Fetch-Site': 'cross-site', | |
'Sec-Fetch-User': '?1', | |
'Connection': 'keep-alive' | |
} | |
def scrape_page(url): | |
soup = None | |
try: | |
response = requests.get(url, headers=headers, timeout=30, verify=True) | |
response.raise_for_status() | |
time.sleep(3) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
except requests.exceptions.RequestException as e: | |
scrape_page(url) | |
return soup | |
def extract_data(soup): | |
for coin in soup.find_all('tr', {'style': 'cursor:pointer'}): | |
rank = coin.find('p', {'class': 'sc-4984dd93-0 iWSjWE'}).text.strip() | |
name = coin.find('p', {'class': 'sc-4984dd93-0 kKpPOn'}).text.strip() | |
name_short = coin.find('p', {'class': 'sc-4984dd93-0 iqdbQL coin-item-symbol'}).text.strip() | |
price = coin.find('span', {'class': 'sc-7bc56c81-0 dCzASk'}).text.strip() | |
price_long = coin.find('span', {'class': 'sc-7bc56c81-1 bCdPBp'}).text.strip() | |
change_1h_elements = coin.find_all('span', {'class': 'sc-6a54057-0 iEhQde'}) | |
change_1h = ' '.join(change_1h_elements[0].text.strip().split()) if change_1h_elements else 'N/A' | |
change_24h_elements = coin.find_all('span', {'class': 'sc-6a54057-0 YXxPZ'}) | |
change_24h = ' '.join(change_24h_elements[1].text.strip().split()) if len(change_24h_elements) > 1 else 'N/A' | |
change_7d_elements = coin.find('span', {'class': 'sc-6a54057-0 iEhQde'}) | |
change_7d = ' '.join(change_7d_elements.text.strip().split()) if change_7d_elements else 'N/A' | |
volume_24h = ' '.join(coin.find('p', {'class': 'sc-4984dd93-0 jZrMxO font_weight_500'}).text.strip().split()) | |
volume_24h_crypto = ' '.join(coin.find('p', {'class': 'sc-4984dd93-0 ihZPK'}).text.strip().split()) | |
calculating_supply = ' '.join(coin.find('p', {'class': 'sc-4984dd93-0 WfVLk'}).text.strip().split()) | |
chart = coin.find('img', {'class': 'sc-14cb040a-0 dmOeak'})['src'] if coin.find('img', {'class': 'sc-14cb040a-0 dmOeak'}) else 'N/A' | |
print(f"\nRank: {rank}, Name: {name} ({name_short}), 1h: {change_1h}, 24h: {change_24h}, 7d: {change_7d}, Price: {price}, Market Cap: {price_long} ({price}), 24h Volume: {volume_24h} ({volume_24h_crypto}), Calculating Supply: {calculating_supply}, Chart: {chart}") | |
def scrape_by_name(name): | |
url = f"https://coinmarketcap.com/currencies/{name}/" | |
soup = scrape_page(url) | |
all_form = soup.find_all('div', {'class': 'sc-aef7b723-0 sc-55349342-0 guaEmH coin-stats'}) | |
for form in all_form: | |
nama = form.find('span', {'data-role': 'coin-name', 'class': 'sc-f70bb44c-0 jltoa'})['title'].strip() | |
nama_short_element = form.find('span', {'data-role': 'coin-symbol', 'class': 'sc-f70bb44c-0 dXQGRd base-text'}) | |
nama_short = nama_short_element.text.strip() if nama_short_element else 'N/A' | |
price = form.find('span', {'class': 'sc-f70bb44c-0 jxpCgO base-text'}).text.strip() | |
presentase = form.find('p', {'class': 'sc-4984dd93-0 sc-58c82cf9-1 heXOji'}).text.strip() | |
print("\n" + nama + f" ({nama_short}) " + " " + price + " -> " + presentase + "\n") | |
coin_stats_section = soup.find('div', {'id': 'section-coin-stats'}) | |
stats_title = soup.find('h6', {'class': 'sc-f70bb44c-0 fBmnvo stats-title'}).text.strip() if soup.find('h6', {'class': 'sc-f70bb44c-0 fBmnvo stats-title'}) else 'N/A' | |
print(stats_title) | |
if coin_stats_section: | |
coin_stats = coin_stats_section.find_all('dl', {'class': 'coin-metrics-table'}) | |
if coin_stats: | |
for stat in coin_stats: | |
stats_data = stat.find_all('div', {'class': 'iQEJet'}) | |
for data in stats_data: | |
key = data.find('div', {'class': 'cWCNDD'}).text.strip() | |
value = data.find('dd', {'class': 'base-text'}).text.strip() | |
if key == "Market cap" or key == "Volume (24h)": | |
percentage_elem = data.find('p', {'color': 'green'}) | |
percentage = percentage_elem.text.strip() if percentage_elem else 'N/A' | |
print(f"{key}: {value} - > {percentage}") | |
else: | |
print(f"{key}: {value}") | |
else: | |
print("No coin stats found.") | |
else: | |
print("Coin stats section not found or URL invalid.") | |
def scrape_all_pages(): | |
for page_number in range(1, 97): # Loop through pages 1 to 96 | |
url = f"https://coinmarketcap.com/?page={page_number}" | |
print(f"Scraping data from page {page_number}...") | |
soup = scrape_page(url) | |
extract_data(soup) | |
if __name__ == "__main__": | |
print(''' | |
1. Search by Name | |
2. Search all pages | |
''') | |
choice = int(input("Enter your choice: ")) | |
if choice == 1: | |
nama_koin = input("Enter the name of the coin (e.g. Bitcoin): ") | |
scrape_by_name(f"{nama_koin.lower()}") | |
elif choice == 2: | |
scrape_all_pages() | |
else: | |
print("Invalid choice. Please enter 1 or 2.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
next update will get this all data:
Bitcoin price BTC$69,462.70 1.99% (1d)Bitcoin to USD ChartLoading DataPlease wait a moment. Add to watchlist Bitcoin statisticsMarket cap 2.17%$1,366,964,177,581#1Volume (24h) 33.22%$20,014,182,760#2Volume/Market cap (24h) 1.46%Circulating supply 19,675,562 BTC93.69%Total supply 19,675,562 BTCMax. supply 21,000,000 BTCFully diluted market cap $1,458,979,811,057Official linksWebsiteWhitepaperGitHubSocialsRedditRating · Based on 2 institutional ratings4.9 Network informationChain explorersSupported walletsUCID1 BTC to USD ConverterBTCUSDPrice performance24h Low$67,585.57High$69,778.10All-time highMar 14, 2024 (24 days ago)$73,750.07-5.8%All-time lowJul 14, 2010 (14 years ago)$0.04865+142816283.44%See historical dataPopularityIn watchlists4,790,028x29th / 9.5KTagsMineablePoWSHA-256Show allMore informationDo you own this project? Update Token Info