import hashlib
import time
class Block:
def __init__(self, index, previous_hash, timestamp, transactions):
self.index = index # Block number
self.previous_hash = previous_hash # Hash of the previous block
self.timestamp = timestamp # Time of block creation
self.transactions = transactions # List of transactions in the block
self.hash = self.calculate_hash() # Unique hash for the block
def calculate_hash(self):
# Create a unique fingerprint (SHA-256 hash) for the block
block_string = f"{self.index}{self.previous_hash}{self.timestamp}{self.transactions}".encode()
return hashlib.sha256(block_string).hexdigest()
class Blockchain:
def __init__(self):
self.chain = [self.create_genesis_block()] # Start with a gensis block
def create_genesis_block(self):
return Block(0, "0", time.time(), "Genesis Block") # First block in the chain
def add_block(self, transactions):
prev_block = self.chain[-1] # Get the last block in the chain
new_block = Block(len(self.chain), prev_block.hash, time.time(), transactions)
self.chain.append(new_block) # Add the new block to the chain
# Creating Blockchain & Adding Transactions
my_chain = Blockchain()
transactions = [
{"sender": "Mary", "receiver": "Daniel", "amount": 28000},
{"sender": "Daniel", "receiver": "Lisa", "amount": 20000},
]
my_chain.add_block(transactions)
my_chain.add_block([{"sender": "Justin", "receiver": "Daniel", "amount": 20000}])
# Print the Blockchain
for block in my_chain.chain:
print(f"Index: {block.index}, Hash: {block.hash}, Previous Hash: {block.previous_hash}, Transactions: {block.transactions}")
Index: 0, Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Previous Hash: 0, Transactions: Genesis Block
Index: 1, Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Previous Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Transactions: [{'sender': 'Mary', 'receiver': 'Daniel', 'amount': 28000}, {'sender': 'Daniel', 'receiver': 'Lisa', 'amount': 20000}]
Index: 2, Hash: 026cea2c954570c300de9f3fee6d96d37dcb23085237974d80844e95b6a7b054, Previous Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Transactions: [{'sender': 'Justin', 'receiver': 'Daniel', 'amount': 20000}]
Install necessary packages
%pip install pandas numpy scikit-learn
Requirement already satisfied: pandas in /Users/dipesh/anaconda3/lib/python3.11/site-packages (1.5.3)
Requirement already satisfied: numpy in /Users/dipesh/anaconda3/lib/python3.11/site-packages (1.24.3)
Requirement already satisfied: scikit-learn in /Users/dipesh/anaconda3/lib/python3.11/site-packages (1.3.0)
Requirement already satisfied: python-dateutil>=2.8.1 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from pandas) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from pandas) (2022.7)
Requirement already satisfied: scipy>=1.5.0 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from scikit-learn) (1.10.1)
Requirement already satisfied: joblib>=1.1.1 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from scikit-learn) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from scikit-learn) (2.2.0)
Requirement already satisfied: six>=1.5 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# Sample dataset of past transactions
data = pd.DataFrame([
{"amount": 100, "is_fraud": 0}, # Normal transaction
{"amount": 5000, "is_fraud": 1}, # Fraudulent transaction
{"amount": 200, "is_fraud": 0}, # Normal transaction
{"amount": 8000, "is_fraud": 1}, # Fraudulent transaction
{"amount": 150, "is_fraud": 0}, # Normal transaction
{"amount": 7500, "is_fraud": 1} # Fraudulent transaction
])
# Split data into features (X) and labels (y)
X = data[["amount"]] # Features (Transaction Amount)
y = data["is_fraud"] # Labels (0: Legit, 1: Fraud)
# Train-Test Split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train the model
model = RandomForestClassifier(n_estimators=10, random_state=42)
model.fit(X_train, y_train)
# Evaluate model accuracy
y_pred = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, y_pred))
Model Accuracy: 1.0
pandas
– Used for handling tabular data (like an Excel sheet).numpy
– Used for numerical operations (though it's not directly used in this code).train_test_split
– Splits data into training and testing sets.RandomForestClassifier
– A machine learning algorithm used for classification.accuracy_score
– Measures how well the model performs.
data = pd.DataFrame([
{"amount": 100, "is_fraud": 0}, # Normal transaction
{"amount": 5000, "is_fraud": 1}, # Fraudulent transaction
{"amount": 200, "is_fraud": 0}, # Normal transaction
{"amount": 8000, "is_fraud": 1}, # Fraudulent transaction
{"amount": 150, "is_fraud": 0}, # Normal transaction
{"amount": 7500, "is_fraud": 1} # Fraudulent transaction
])
- This creates a pandas DataFrame with two columns:
amount
(transaction amount in dollars)is_fraud
(0 = normal, 1 = fraudulent)
- The dataset is very small (only 6 rows) and is used for demonstration.
X = data[["amount"]] # Features (Transaction Amount)
y = data["is_fraud"] # Labels (0: Legit, 1: Fraud)
X
– Contains features (the data we use to predict fraud). Here, it's just the transaction amount.y
– Contains the target labels (what we want to predict – fraud or not fraud).
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
- Purpose: Splits data into training (80%) and testing (20%) sets.
- Why?
- The training set is used to teach the model.
- The testing set is used to evaluate performance.
random_state=42
– Ensures reproducibility (so the split is the same every time you run the code).
model = RandomForestClassifier(n_estimators=10, random_state=42)
model.fit(X_train, y_train)
- What is
RandomForestClassifier
?- It’s an ensemble machine learning algorithm.
- It creates multiple decision trees and combines their outputs.
- It's good for classification tasks like fraud detection.
n_estimators=10
- Uses 10 decision trees in the forest.
.fit(X_train, y_train)
- Trains the model using the training data.
y_pred = model.predict(X_test)
- Predicts whether the transactions in the test set are fraudulent or not.
print("Model Accuracy:", accuracy_score(y_test, y_pred))
accuracy_score(y_test, y_pred)
- Compares the model’s predictions (
y_pred
) with the actual labels (y_test
). - Outputs a score between 0 and 1 (1 = perfect prediction, 0 = all wrong).
- Compares the model’s predictions (
- Load necessary libraries.
- Create a small dataset of transactions.
- Separate the data into features (
X
) and labels (y
). - Split the data into training (80%) and testing (20%).
- Train a Random Forest Classifier model on the training data.
- Use the model to predict fraud in the test data.
- Measure the model’s accuracy.
- Supervised Learning: The model learns from labeled data (
is_fraud
values). - Feature Selection: The only feature used here is transaction amount.
- Limitations of this Example:
- The dataset is tiny and unrealistic. Only for educational purposes.
- A real-world fraud detection model would use more features (like transaction location, time, user behavior, etc.).
- We should handle imbalanced data (real-world fraud is much rarer than normal transactions).
def is_fraudulent(transaction):
amount = np.array (transaction["amount"]).reshape (1, -1)
return model.predict(amount)[0] == 1 # 1 means fraud
# New Transactions (AI validates before adding to blockchain)
new_transactions = [
{"sender": "George", "receiver": "Henry", "amount": 900}, # Likely legit
{"sender": "Ivy", "receiver": "Jack", "amount": 10000} # Likely fraud
]
# Validate transactions with AI before adding to blockchain
valid_transactions = [tx for tx in new_transactions if not is_fraudulent(tx)]
if valid_transactions:
my_chain.add_block(valid_transactions)
print("Safe transactions added to the blockchain!")
else:
print("No valid transactions found.")
Safe transactions added to the blockchain!
/Users/dipesh/anaconda3/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but RandomForestClassifier was fitted with feature names
warnings.warn(
/Users/dipesh/anaconda3/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but RandomForestClassifier was fitted with feature names
warnings.warn(
# Print the Blockchain
for block in my_chain.chain:
print(f"Index: {block.index}, Hash: {block.hash}, Previous Hash: {block.previous_hash}, Transactions: {block.transactions}")
Index: 0, Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Previous Hash: 0, Transactions: Genesis Block
Index: 1, Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Previous Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Transactions: [{'sender': 'Mary', 'receiver': 'Daniel', 'amount': 28000}, {'sender': 'Daniel', 'receiver': 'Lisa', 'amount': 20000}]
Index: 2, Hash: 026cea2c954570c300de9f3fee6d96d37dcb23085237974d80844e95b6a7b054, Previous Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Transactions: [{'sender': 'Justin', 'receiver': 'Daniel', 'amount': 20000}]
Index: 3, Hash: 40bb6a72df826ae077b3cb8ef9ff5be3d278fcf45968e84b1ff7b43f2328b6d3, Previous Hash: 026cea2c954570c300de9f3fee6d96d37dcb23085237974d80844e95b6a7b054, Transactions: [{'sender': 'George', 'receiver': 'Henry', 'amount': 900}]