Skip to content

Instantly share code, notes, and snippets.

@balvinder294
Last active March 27, 2020 17:45
Show Gist options
  • Select an option

  • Save balvinder294/b8f4c62f704e42bc109de8a0a8dd217b to your computer and use it in GitHub Desktop.

Select an option

Save balvinder294/b8f4c62f704e42bc109de8a0a8dd217b to your computer and use it in GitHub Desktop.
Extract Data from Any Document like Resume, Biil, or any Form with AWS Textract Synchronous Snippet --- Tekraze.com
//Imports for packages used
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
import com.amazonaws.services.textract.AmazonTextract;
import com.amazonaws.services.textract.AmazonTextractClientBuilder;
import com.amazonaws.services.textract.model.AnalyzeDocumentRequest;
import com.amazonaws.services.textract.model.AnalyzeDocumentResult;
import com.amazonaws.services.textract.model.Document;
import com.amazonaws.services.textract.model.S3Object;
//
/******************************** Step 1
Set credentials and create a credential object for authorization */
String access_key_id = "YOUR_APP_ID";
String access_secret_key = "YOUR_APP_KEY";
BasicAWSCredentials awsCreds = new BasicAWSCredentials(access_key_id, access_secret_key);
AWSStaticCredentialsProvider awsStaticCredentialsProvider = new AWSStaticCredentialsProvider(awsCreds);
/**************************** Step 2
Create Endpoint URL for Service you want to use
here we using textract at us-east-2
*/
String region = "us-east-2";
String regionUrl = "https://textract.us-east-2.amazonaws.com";
EndpointConfiguration endpoint = new EndpointConfiguration(regionUrl, region);
/*************************************************** Step 3
Intialize The Textract Client
*/
AmazonTextract textractClient = AmazonTextractClientBuilder.standard()
.withCredentials(awsStaticCredentialsProvider)
.withEndpointConfiguration(endpoint)
.build();
/******************************************** Step 4
Create a Document from Either S3 Object or from a local file
*/
/********** Using S3 Object*************/
/*** S3 object like PDF, PNG, JPG ***/
// Initialize Document
// Intialize S3 Object
S3Object s3Object = new S3Object()
.withBucket(bucketName) // Name of bucket where object is stored
.withName(objectName); // Name of Object to get
// Intialize document
Document doc = new Document()
.withS3Object(s3Object);
/************* Using Local Document converted as Image *****************/
// Read as byte[] from the input file
byte[] fileAsBytes = file.getBytes();
// Intialize document and pass byte[] from file
Document document = new Document()
.withBytes(ByteBuffer.wrap(fileAsBytes));
/********************** Step 5*************
* Initialize AnalyzeDocument Request
*/
AnalyzeDocumentRequest request = new AnalyzeDocumentRequest()
.withFeatureTypes("TABLES","FORMS") // Features to get or anyone from table to form
.withDocument(document); // the document we created in step 4
/************************Step 6*********************
* Initialize AnlyzeDocument Result
*/
AnalyzeDocumentResult result = textractClient
.analyzeDocument(request);
/**************You can now use this result ******************/
/****************** Step 7***************************
*Getting Data from Blocks
Blocks are the Nested data having data of a Page, a line, or a word and their relationships.
***/
// List of Blocks
List<com.amazonaws.services.textract.model.Block> blocks = result.getBlocks();
// Get Specific data by iterating blocks List
blocks.forEach(
block -> { // Single block
System.out.println(block.getBlockType()); // Type like LINE, WORD
System.out.println(block.getText()); // Text from the block from file
System.out.println(block.getConfidence()); // Confidence of data extracted is good
System.out.println(block.getEntityTypes()); // Entity Types like Key or Value or something else
System.out.println(block.getRelationships()); // Relationship between blocks like A key may link to a value
}
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment