agrazh · December 1, 2025 21:14
diff --git a/readme.md b/readme.md
diff --git a/DeltaAzureReader.scala b/DeltaAzureReader.scala
 package com.example

 import org.apache.spark.sql.SparkSession
 import io.delta.tables.DeltaTable

 /**
 * Simple Spark application to read Delta table from Azure Blob Storage
 * using ABFSS protocol and Service Principal authentication.
 */
 object DeltaAzureReader {

  def main(args: Array[String]): Unit = {

    // ============================================
    // CONFIGURATION - Replace with your values
    // ============================================

    // Azure Storage Account details
    val storageAccountName = "your_storage_account_name"
    val containerName = "your_container_name"
    val deltaTablePath = "path/to/delta/table"

    // Service Principal credentials
    val tenantId = "your_tenant_id"
    val clientId = "your_client_id"
    val clientSecret = "your_client_secret"

    // ============================================
    // BUILD SPARK SESSION
    // ============================================

    val spark = SparkSession.builder()
      .appName("Delta Azure Reader")
      .master("local[*]")  // Running locally on Windows
      .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
      .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

      // Azure ABFSS configuration for Service Principal OAuth
      .config(s"fs.azure.account.auth.type.$storageAccountName.dfs.core.windows.net", "OAuth")
      .config(s"fs.azure.account.oauth.provider.type.$storageAccountName.dfs.core.windows.net",
        "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
      .config(s"fs.azure.account.oauth2.client.id.$storageAccountName.dfs.core.windows.net", clientId)
      .config(s"fs.azure.account.oauth2.client.secret.$storageAccountName.dfs.core.windows.net", clientSecret)
      .config(s"fs.azure.account.oauth2.client.endpoint.$storageAccountName.dfs.core.windows.net",
        s"https://login.microsoftonline.com/$tenantId/oauth2/token")

      // Additional Spark configurations for local Windows execution
      .config("spark.driver.memory", "4g")
      .config("spark.sql.shuffle.partitions", "4")
      .getOrCreate()

    // Set log level
    spark.sparkContext.setLogLevel("WARN")

    try {
      // ============================================
      // READ DELTA TABLE
      // ============================================

      val abfssPath = s"abfss://$containerName@$storageAccountName.dfs.core.windows.net/$deltaTablePath"

      println(s"Reading Delta table from: $abfssPath")

      // Method 1: Using DataFrame API
      val df = spark.read
        .format("delta")
        .load(abfssPath)

      println("\n=== Schema ===")
      df.printSchema()

      println("\n=== Sample Data (first 10 rows) ===")
      df.show(10, truncate = false)

      println(s"\n=== Total Record Count: ${df.count()} ===")

      // Method 2: Using DeltaTable API (optional - for more Delta-specific operations)
      val deltaTable = DeltaTable.forPath(spark, abfssPath)

      println("\n=== Delta Table History ===")
      deltaTable.history(5).show(truncate = false)

      // Example: Run a simple SQL query
      df.createOrReplaceTempView("delta_data")
      println("\n=== SQL Query Example ===")
      spark.sql("SELECT * FROM delta_data LIMIT 5").show(truncate = false)

    } catch {
      case e: Exception =>
        println(s"Error reading Delta table: ${e.getMessage}")
        e.printStackTrace()
    } finally {
      // Stop Spark session
      spark.stop()
      println("\nSpark session stopped.")
    }
  }
 }
diff --git a/pom.xml b/pom.xml
 <?xml version="1.0" encoding="UTF-8"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.example</groupId>
    <artifactId>delta-azure-reader</artifactId>
    <version>1.0.0</version>
    <packaging>jar</packaging>

    <name>Delta Azure Reader</name>
    <description>Read Delta tables from Azure Blob Storage using Spark</description>

    <properties>
        <scala.version>2.12.18</scala.version>
        <scala.binary.version>2.12</scala.binary.version>
        <spark.version>3.5.0</spark.version>
        <delta.version>3.1.0</delta.version>
        <hadoop.version>3.3.4</hadoop.version>
        <maven.compiler.source>11</maven.compiler.source>
        <maven.compiler.target>11</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
        <!-- Scala -->
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>${scala.version}</version>
        </dependency>

        <!-- Apache Spark -->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_${scala.binary.version}</artifactId>
            <version>${spark.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_${scala.binary.version}</artifactId>
            <version>${spark.version}</version>
        </dependency>

        <!-- Delta Lake -->
        <dependency>
            <groupId>io.delta</groupId>
            <artifactId>delta-spark_${scala.binary.version}</artifactId>
            <version>${delta.version}</version>
        </dependency>

        <!-- Azure Storage Dependencies -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-azure</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-azure-datalake</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <!-- Azure Identity for Service Principal auth -->
        <dependency>
            <groupId>com.azure</groupId>
            <artifactId>azure-identity</artifactId>
            <version>1.11.1</version>
        </dependency>
        <dependency>
            <groupId>com.azure</groupId>
            <artifactId>azure-storage-blob</artifactId>
            <version>12.25.1</version>
        </dependency>

        <!-- Required for OAuth authentication -->
        <dependency>
            <groupId>com.microsoft.azure</groupId>
            <artifactId>azure-storage</artifactId>
            <version>8.6.6</version>
        </dependency>
    </dependencies>

    <build>
        <sourceDirectory>src/main/scala</sourceDirectory>
        <plugins>
            <!-- Scala Compiler Plugin -->
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>4.8.1</version>
                <executions>
                    <execution>
                        <goals>
                            <goal>compile</goal>
                            <goal>testCompile</goal>
                        </goals>
                    </execution>
                </executions>
                <configuration>
                    <scalaVersion>${scala.version}</scalaVersion>
                </configuration>
            </plugin>

            <!-- Maven Shade Plugin for creating uber-jar -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>3.5.1</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <transformers>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>com.example.DeltaAzureReader</mainClass>
                                </transformer>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
                            </transformers>
                            <filters>
                                <filter>
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
 </project>
	package com.example

	import org.apache.spark.sql.SparkSession
	import io.delta.tables.DeltaTable

	/**
	* Simple Spark application to read Delta table from Azure Blob Storage
	* using ABFSS protocol and Service Principal authentication.
	*/
	object DeltaAzureReader {

	def main(args: Array[String]): Unit = {

	// ============================================
	// CONFIGURATION - Replace with your values
	// ============================================

	// Azure Storage Account details
	val storageAccountName = "your_storage_account_name"
	val containerName = "your_container_name"
	val deltaTablePath = "path/to/delta/table"

	// Service Principal credentials
	val tenantId = "your_tenant_id"
	val clientId = "your_client_id"
	val clientSecret = "your_client_secret"

	// ============================================
	// BUILD SPARK SESSION
	// ============================================

	val spark = SparkSession.builder()
	.appName("Delta Azure Reader")
	.master("local[*]") // Running locally on Windows
	.config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
	.config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

	// Azure ABFSS configuration for Service Principal OAuth
	.config(s"fs.azure.account.auth.type.$storageAccountName.dfs.core.windows.net", "OAuth")
	.config(s"fs.azure.account.oauth.provider.type.$storageAccountName.dfs.core.windows.net",
	"org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
	.config(s"fs.azure.account.oauth2.client.id.$storageAccountName.dfs.core.windows.net", clientId)
	.config(s"fs.azure.account.oauth2.client.secret.$storageAccountName.dfs.core.windows.net", clientSecret)
	.config(s"fs.azure.account.oauth2.client.endpoint.$storageAccountName.dfs.core.windows.net",
	s"https://login.microsoftonline.com/$tenantId/oauth2/token")

	// Additional Spark configurations for local Windows execution
	.config("spark.driver.memory", "4g")
	.config("spark.sql.shuffle.partitions", "4")
	.getOrCreate()

	// Set log level
	spark.sparkContext.setLogLevel("WARN")

	try {
	// ============================================
	// READ DELTA TABLE
	// ============================================

	val abfssPath = s"abfss://$containerName@$storageAccountName.dfs.core.windows.net/$deltaTablePath"

	println(s"Reading Delta table from: $abfssPath")

	// Method 1: Using DataFrame API
	val df = spark.read
	.format("delta")
	.load(abfssPath)

	println("\n=== Schema ===")
	df.printSchema()

	println("\n=== Sample Data (first 10 rows) ===")
	df.show(10, truncate = false)

	println(s"\n=== Total Record Count: ${df.count()} ===")

	// Method 2: Using DeltaTable API (optional - for more Delta-specific operations)
	val deltaTable = DeltaTable.forPath(spark, abfssPath)

	println("\n=== Delta Table History ===")
	deltaTable.history(5).show(truncate = false)

	// Example: Run a simple SQL query
	df.createOrReplaceTempView("delta_data")
	println("\n=== SQL Query Example ===")
	spark.sql("SELECT * FROM delta_data LIMIT 5").show(truncate = false)

	} catch {
	case e: Exception =>
	println(s"Error reading Delta table: ${e.getMessage}")
	e.printStackTrace()
	} finally {
	// Stop Spark session
	spark.stop()
	println("\nSpark session stopped.")
	}
	}
	}
	<?xml version="1.0" encoding="UTF-8"?>
	<project xmlns="http://maven.apache.org/POM/4.0.0"
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>com.example</groupId>
	<artifactId>delta-azure-reader</artifactId>
	<version>1.0.0</version>
	<packaging>jar</packaging>

	<name>Delta Azure Reader</name>
	<description>Read Delta tables from Azure Blob Storage using Spark</description>

	<properties>
	<scala.version>2.12.18</scala.version>
	<scala.binary.version>2.12</scala.binary.version>
	<spark.version>3.5.0</spark.version>
	<delta.version>3.1.0</delta.version>
	<hadoop.version>3.3.4</hadoop.version>
	<maven.compiler.source>11</maven.compiler.source>
	<maven.compiler.target>11</maven.compiler.target>
	<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	</properties>

	<dependencies>
	<!-- Scala -->
	<dependency>
	<groupId>org.scala-lang</groupId>
	<artifactId>scala-library</artifactId>
	<version>${scala.version}</version>
	</dependency>

	<!-- Apache Spark -->
	<dependency>
	<groupId>org.apache.spark</groupId>
	<artifactId>spark-core_${scala.binary.version}</artifactId>
	<version>${spark.version}</version>
	</dependency>
	<dependency>
	<groupId>org.apache.spark</groupId>
	<artifactId>spark-sql_${scala.binary.version}</artifactId>
	<version>${spark.version}</version>
	</dependency>

	<!-- Delta Lake -->
	<dependency>
	<groupId>io.delta</groupId>
	<artifactId>delta-spark_${scala.binary.version}</artifactId>
	<version>${delta.version}</version>
	</dependency>

	<!-- Azure Storage Dependencies -->
	<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-azure</artifactId>
	<version>${hadoop.version}</version>
	</dependency>
	<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-common</artifactId>
	<version>${hadoop.version}</version>
	</dependency>
	<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-azure-datalake</artifactId>
	<version>${hadoop.version}</version>
	</dependency>

	<!-- Azure Identity for Service Principal auth -->
	<dependency>
	<groupId>com.azure</groupId>
	<artifactId>azure-identity</artifactId>
	<version>1.11.1</version>
	</dependency>
	<dependency>
	<groupId>com.azure</groupId>
	<artifactId>azure-storage-blob</artifactId>
	<version>12.25.1</version>
	</dependency>

	<!-- Required for OAuth authentication -->
	<dependency>
	<groupId>com.microsoft.azure</groupId>
	<artifactId>azure-storage</artifactId>
	<version>8.6.6</version>
	</dependency>
	</dependencies>

	<build>
	<sourceDirectory>src/main/scala</sourceDirectory>
	<plugins>
	<!-- Scala Compiler Plugin -->
	<plugin>
	<groupId>net.alchim31.maven</groupId>
	<artifactId>scala-maven-plugin</artifactId>
	<version>4.8.1</version>
	<executions>
	<execution>
	<goals>
	<goal>compile</goal>
	<goal>testCompile</goal>
	</goals>
	</execution>
	</executions>
	<configuration>
	<scalaVersion>${scala.version}</scalaVersion>
	</configuration>
	</plugin>

	<!-- Maven Shade Plugin for creating uber-jar -->
	<plugin>
	<groupId>org.apache.maven.plugins</groupId>
	<artifactId>maven-shade-plugin</artifactId>
	<version>3.5.1</version>
	<executions>
	<execution>
	<phase>package</phase>
	<goals>
	<goal>shade</goal>
	</goals>
	<configuration>
	<transformers>
	<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
	<mainClass>com.example.DeltaAzureReader</mainClass>
	</transformer>
	<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
	</transformers>
	<filters>
	<filter>
	<artifact>:</artifact>
	<excludes>
	<exclude>META-INF/*.SF</exclude>
	<exclude>META-INF/*.DSA</exclude>
	<exclude>META-INF/*.RSA</exclude>
	</excludes>
	</filter>
	</filters>
	</configuration>
	</execution>
	</executions>
	</plugin>
	</plugins>
	</build>
	</project>