Skip to content

Instantly share code, notes, and snippets.

@Dev-Dipesh
Created March 22, 2025 04:31
Show Gist options
  • Save Dev-Dipesh/9977202b70d8c14e9c6d1adf543be207 to your computer and use it in GitHub Desktop.
Save Dev-Dipesh/9977202b70d8c14e9c6d1adf543be207 to your computer and use it in GitHub Desktop.
Simple Test Code for Creating Blockchain + Basic AI Fraud Detection

Simple Test Code for Creating Blockchain + Basic AI Fraud Detection

import hashlib
import time

class Block:
  def __init__(self, index, previous_hash, timestamp, transactions):
    self.index = index # Block number
    self.previous_hash = previous_hash # Hash of the previous block
    self.timestamp = timestamp # Time of block creation
    self.transactions = transactions # List of transactions in the block
    self.hash = self.calculate_hash() # Unique hash for the block
  
  def calculate_hash(self):
    # Create a unique fingerprint (SHA-256 hash) for the block
    block_string = f"{self.index}{self.previous_hash}{self.timestamp}{self.transactions}".encode()
    return hashlib.sha256(block_string).hexdigest()
  
class Blockchain:
  def __init__(self):
    self.chain = [self.create_genesis_block()] # Start with a gensis block

  def create_genesis_block(self):
    return Block(0, "0", time.time(), "Genesis Block") # First block in the chain
  
  def add_block(self, transactions):
    prev_block = self.chain[-1] # Get the last block in the chain
    new_block = Block(len(self.chain), prev_block.hash, time.time(), transactions)
    self.chain.append(new_block) # Add the new block to the chain

# Creating Blockchain & Adding Transactions
my_chain = Blockchain()
transactions = [
  {"sender": "Mary", "receiver": "Daniel", "amount": 28000},
  {"sender": "Daniel", "receiver": "Lisa", "amount": 20000},
]

my_chain.add_block(transactions)
my_chain.add_block([{"sender": "Justin", "receiver": "Daniel", "amount": 20000}])
# Print the Blockchain
for block in my_chain.chain:
  print(f"Index: {block.index}, Hash: {block.hash}, Previous Hash: {block.previous_hash}, Transactions: {block.transactions}")
Index: 0, Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Previous Hash: 0, Transactions: Genesis Block
Index: 1, Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Previous Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Transactions: [{'sender': 'Mary', 'receiver': 'Daniel', 'amount': 28000}, {'sender': 'Daniel', 'receiver': 'Lisa', 'amount': 20000}]
Index: 2, Hash: 026cea2c954570c300de9f3fee6d96d37dcb23085237974d80844e95b6a7b054, Previous Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Transactions: [{'sender': 'Justin', 'receiver': 'Daniel', 'amount': 20000}]

Install necessary packages

%pip install pandas numpy scikit-learn
Requirement already satisfied: pandas in /Users/dipesh/anaconda3/lib/python3.11/site-packages (1.5.3)
Requirement already satisfied: numpy in /Users/dipesh/anaconda3/lib/python3.11/site-packages (1.24.3)
Requirement already satisfied: scikit-learn in /Users/dipesh/anaconda3/lib/python3.11/site-packages (1.3.0)
Requirement already satisfied: python-dateutil>=2.8.1 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from pandas) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from pandas) (2022.7)
Requirement already satisfied: scipy>=1.5.0 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from scikit-learn) (1.10.1)
Requirement already satisfied: joblib>=1.1.1 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from scikit-learn) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from scikit-learn) (2.2.0)
Requirement already satisfied: six>=1.5 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Sample dataset of past transactions
data = pd.DataFrame([
    {"amount": 100, "is_fraud": 0},   # Normal transaction
    {"amount": 5000, "is_fraud": 1},  # Fraudulent transaction
    {"amount": 200, "is_fraud": 0},   # Normal transaction
    {"amount": 8000, "is_fraud": 1},  # Fraudulent transaction
    {"amount": 150, "is_fraud": 0},   # Normal transaction
    {"amount": 7500, "is_fraud": 1}   # Fraudulent transaction
])

# Split data into features (X) and labels (y)
X = data[["amount"]]   # Features (Transaction Amount)
y = data["is_fraud"]   # Labels (0: Legit, 1: Fraud)

# Train-Test Split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestClassifier(n_estimators=10, random_state=42)
model.fit(X_train, y_train)

# Evaluate model accuracy
y_pred = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, y_pred))
Model Accuracy: 1.0

Code Explanation


Step 1: Importing Required Libraries

  • pandas – Used for handling tabular data (like an Excel sheet).
  • numpy – Used for numerical operations (though it's not directly used in this code).
  • train_test_split – Splits data into training and testing sets.
  • RandomForestClassifier – A machine learning algorithm used for classification.
  • accuracy_score – Measures how well the model performs.

Step 2: Creating a Sample Dataset

data = pd.DataFrame([
    {"amount": 100, "is_fraud": 0},   # Normal transaction
    {"amount": 5000, "is_fraud": 1},  # Fraudulent transaction
    {"amount": 200, "is_fraud": 0},   # Normal transaction
    {"amount": 8000, "is_fraud": 1},  # Fraudulent transaction
    {"amount": 150, "is_fraud": 0},   # Normal transaction
    {"amount": 7500, "is_fraud": 1}   # Fraudulent transaction
])
  • This creates a pandas DataFrame with two columns:
    • amount (transaction amount in dollars)
    • is_fraud (0 = normal, 1 = fraudulent)
  • The dataset is very small (only 6 rows) and is used for demonstration.

Step 3: Splitting Data into Features (X) and Labels (y)

X = data[["amount"]]   # Features (Transaction Amount)
y = data["is_fraud"]   # Labels (0: Legit, 1: Fraud)
  • X – Contains features (the data we use to predict fraud). Here, it's just the transaction amount.
  • y – Contains the target labels (what we want to predict – fraud or not fraud).

Step 4: Splitting the Dataset into Training and Testing Sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  • Purpose: Splits data into training (80%) and testing (20%) sets.
  • Why?
    • The training set is used to teach the model.
    • The testing set is used to evaluate performance.
  • random_state=42 – Ensures reproducibility (so the split is the same every time you run the code).

Step 5: Training the Machine Learning Model

model = RandomForestClassifier(n_estimators=10, random_state=42)
model.fit(X_train, y_train)
  • What is RandomForestClassifier?
    • It’s an ensemble machine learning algorithm.
    • It creates multiple decision trees and combines their outputs.
    • It's good for classification tasks like fraud detection.
  • n_estimators=10
    • Uses 10 decision trees in the forest.
  • .fit(X_train, y_train)
    • Trains the model using the training data.

Step 6: Making Predictions

y_pred = model.predict(X_test)
  • Predicts whether the transactions in the test set are fraudulent or not.

Step 7: Evaluating the Model

print("Model Accuracy:", accuracy_score(y_test, y_pred))
  • accuracy_score(y_test, y_pred)
    • Compares the model’s predictions (y_pred) with the actual labels (y_test).
    • Outputs a score between 0 and 1 (1 = perfect prediction, 0 = all wrong).

Summary

  1. Load necessary libraries.
  2. Create a small dataset of transactions.
  3. Separate the data into features (X) and labels (y).
  4. Split the data into training (80%) and testing (20%).
  5. Train a Random Forest Classifier model on the training data.
  6. Use the model to predict fraud in the test data.
  7. Measure the model’s accuracy.

Key Takeaways

  • Supervised Learning: The model learns from labeled data (is_fraud values).
  • Feature Selection: The only feature used here is transaction amount.
  • Limitations of this Example:
    • The dataset is tiny and unrealistic. Only for educational purposes.
    • A real-world fraud detection model would use more features (like transaction location, time, user behavior, etc.).
    • We should handle imbalanced data (real-world fraud is much rarer than normal transactions).
def is_fraudulent(transaction):
  amount = np.array (transaction["amount"]).reshape (1, -1)
  return model.predict(amount)[0] == 1 # 1 means fraud

# New Transactions (AI validates before adding to blockchain)
new_transactions = [
  {"sender": "George", "receiver": "Henry", "amount": 900}, # Likely legit
  {"sender": "Ivy", "receiver": "Jack", "amount": 10000} # Likely fraud
]

# Validate transactions with AI before adding to blockchain
valid_transactions = [tx for tx in new_transactions if not is_fraudulent(tx)]

if valid_transactions:
  my_chain.add_block(valid_transactions)
  print("Safe transactions added to the blockchain!")
else:
  print("No valid transactions found.")
Safe transactions added to the blockchain!


/Users/dipesh/anaconda3/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but RandomForestClassifier was fitted with feature names
  warnings.warn(
/Users/dipesh/anaconda3/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but RandomForestClassifier was fitted with feature names
  warnings.warn(
# Print the Blockchain
for block in my_chain.chain:
  print(f"Index: {block.index}, Hash: {block.hash}, Previous Hash: {block.previous_hash}, Transactions: {block.transactions}")
Index: 0, Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Previous Hash: 0, Transactions: Genesis Block
Index: 1, Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Previous Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Transactions: [{'sender': 'Mary', 'receiver': 'Daniel', 'amount': 28000}, {'sender': 'Daniel', 'receiver': 'Lisa', 'amount': 20000}]
Index: 2, Hash: 026cea2c954570c300de9f3fee6d96d37dcb23085237974d80844e95b6a7b054, Previous Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Transactions: [{'sender': 'Justin', 'receiver': 'Daniel', 'amount': 20000}]
Index: 3, Hash: 40bb6a72df826ae077b3cb8ef9ff5be3d278fcf45968e84b1ff7b43f2328b6d3, Previous Hash: 026cea2c954570c300de9f3fee6d96d37dcb23085237974d80844e95b6a7b054, Transactions: [{'sender': 'George', 'receiver': 'Henry', 'amount': 900}]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment