Forked from balvinder294/audio-transcriptions-aws.java
Created
December 12, 2020 02:58
-
-
Save ripper2hl/a62d81438bf6378212a33ed5bb75fa27 to your computer and use it in GitHub Desktop.
Audio File Transcription for Transcribe API Aws == Tekraze.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/********** Step 1 ********************** | |
*********** File Imports ***************/ | |
import com.amazonaws.services.transcribe.AmazonTranscribe; | |
import com.amazonaws.services.transcribe.AmazonTranscribeClientBuilder; | |
import com.amazonaws.services.transcribe.model.DeleteTranscriptionJobRequest; | |
import com.amazonaws.services.transcribe.model.GetTranscriptionJobRequest; | |
import com.amazonaws.services.transcribe.model.GetTranscriptionJobResult; | |
import com.amazonaws.services.transcribe.model.LanguageCode; | |
import com.amazonaws.services.transcribe.model.Media; | |
import com.amazonaws.services.transcribe.model.StartTranscriptionJobRequest; | |
import com.amazonaws.services.transcribe.model.StartTranscriptionJobResult; | |
import com.amazonaws.services.transcribe.model.TranscriptionJob; | |
import com.amazonaws.services.transcribe.model.TranscriptionJobStatus; | |
import com.fasterxml.jackson.databind.ObjectMapper; | |
import okhttp3.OkHttpClient; | |
import okhttp3.Request; | |
import okhttp3.Response; | |
// Check at Another Gist >> https://gist.github.com/balvinder294/fd16a7bd8cf5d9a889161d370cf8ee3f | |
import dehaze.mvp.service.dto.awstranscribe.TranscriptionResponseDTO; | |
/*********************************** Step 2*********************************** | |
**************** Initialize Trascribe Client *******************************/ | |
AmazonTranscribe transcribeClient() { | |
log.debug("Intialize Transcribe Client"); | |
BasicAWSCredentials awsCreds = new BasicAWSCredentials(awsAccessKey, awsSecretKey); | |
AWSStaticCredentialsProvider awsStaticCredentialsProvider = new AWSStaticCredentialsProvider(awsCreds); | |
return AmazonTranscribeClientBuilder.standard().withCredentials(awsStaticCredentialsProvider) | |
.withRegion(awsRegion).build(); | |
} | |
/***** Step 3********************************************************************* | |
* ***************Upload/Delte File to Aws method *************************************** | |
******* You can skip if you already have file in the aws bucket*******/ | |
AmazonS3 s3Client() { | |
log.debug("Intialize AWS S3 Client"); | |
BasicAWSCredentials awsCreds = new BasicAWSCredentials(awsAccessKey, awsSecretKey); | |
AWSStaticCredentialsProvider awsStaticCredentialsProvider = new AWSStaticCredentialsProvider(awsCreds); | |
return AmazonS3ClientBuilder.standard().withCredentials(awsStaticCredentialsProvider).withRegion(awsRegion) | |
.build(); | |
} | |
public void uploadFileToAwsBucket(MultipartFile file) { | |
log.debug("Upload file to AWS Bucket {}", file); | |
String key = file.getOriginalFilename().replaceAll(" ", "_").toLowerCase(); | |
try { | |
s3Client().putObject(bucketName, key, file.getInputStream(), null); | |
} catch (SdkClientException | IOException e) { | |
e.printStackTrace(); | |
} | |
} | |
public void deleteFileFromAwsBucket(String fileName) { | |
log.debug("Delete File from AWS Bucket {}", fileName); | |
String key = fileName.replaceAll(" ", "_").toLowerCase(); | |
s3Client().deleteObject(bucketName, key); | |
} | |
/*************** Step 4 ****************** | |
* **** Start Transcription Job Method******/ | |
StartTranscriptionJobResult startTranscriptionJob(String key) { | |
log.debug("Start Transcription Job By Key {}",key); | |
Media media = new Media().withMediaFileUri(s3Client().getUrl(bucketName, key).toExternalForm()); | |
String jobName = key.concat(RandomString.make()); | |
StartTranscriptionJobRequest startTranscriptionJobRequest = new StartTranscriptionJobRequest() | |
.withLanguageCode(LanguageCode.EnUS).withTranscriptionJobName(jobName).withMedia(media); | |
StartTranscriptionJobResult startTranscriptionJobResult = transcribeClient() | |
.startTranscriptionJob(startTranscriptionJobRequest); | |
return startTranscriptionJobResult; | |
} | |
/**************************Step 5***************** | |
* *** Get Transcription Job Result method *********/ | |
GetTranscriptionJobResult getTranscriptionJobResult(String jobName) { | |
log.debug("Get Transcription Job Result By Job Name : {}",jobName); | |
GetTranscriptionJobRequest getTranscriptionJobRequest = new GetTranscriptionJobRequest() | |
.withTranscriptionJobName(jobName); | |
Boolean resultFound = false; | |
TranscriptionJob transcriptionJob = new TranscriptionJob(); | |
GetTranscriptionJobResult getTranscriptionJobResult = new GetTranscriptionJobResult(); | |
while (resultFound == false) { | |
getTranscriptionJobResult = transcribeClient().getTranscriptionJob(getTranscriptionJobRequest); | |
transcriptionJob = getTranscriptionJobResult.getTranscriptionJob(); | |
if (transcriptionJob.getTranscriptionJobStatus() | |
.equalsIgnoreCase(TranscriptionJobStatus.COMPLETED.name())) { | |
return getTranscriptionJobResult; | |
} else if (transcriptionJob.getTranscriptionJobStatus() | |
.equalsIgnoreCase(TranscriptionJobStatus.FAILED.name())) { | |
return null; | |
} else if (transcriptionJob.getTranscriptionJobStatus() | |
.equalsIgnoreCase(TranscriptionJobStatus.IN_PROGRESS.name())) { | |
try { | |
Thread.sleep(15000); | |
} catch (InterruptedException e) { | |
log.debug("Interrupted Exception {}", e.getMessage()); | |
} | |
} | |
} | |
return getTranscriptionJobResult; | |
} | |
/******************Step 6 ************************** | |
* Download Transcription Result from URI Method *********/ | |
TranscriptionResponseDTO downloadTranscriptionResponse(String uri){ | |
log.debug("Download Transcription Result from Transcribe URi {}", uri); | |
OkHttpClient okHttpClient = new OkHttpClient() | |
.newBuilder() | |
.connectTimeout(60, TimeUnit.SECONDS) | |
.writeTimeout(60, TimeUnit.SECONDS) | |
.readTimeout(60, TimeUnit.SECONDS) | |
.build(); | |
Request request = new Request.Builder().url(uri).build(); | |
Response response; | |
try { | |
response = okHttpClient.newCall(request).execute(); | |
String body = response.body().string(); | |
ObjectMapper objectMapper = new ObjectMapper(); | |
response.close(); | |
return objectMapper.readValue(body, TranscriptionResponseDTO.class); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
return null; | |
} | |
/***************************** Step 7 ***************************** | |
* **** Delete Transcription Job Method ************ | |
* TO delete transcription job after getting result****/ | |
void deleteTranscriptionJob(String jobName) { | |
log.debug("Delete Transcription Job from amazon Transcribe {}",jobName); | |
DeleteTranscriptionJobRequest deleteTranscriptionJobRequest = new DeleteTranscriptionJobRequest() | |
.withTranscriptionJobName(jobName); | |
transcribeClient().deleteTranscriptionJob(deleteTranscriptionJobRequest); | |
} | |
/********************************* Step 8**************************************** | |
* Extract Speech Text method that combines all methods to a single method****** | |
***** You can do skip upload delete methods if you want to just process file in AWs | |
* ** by passing key for filename in bucket and create media *****/ | |
public TranscriptionResponseDTO extractSpeechTextFromVideo(MultipartFile file) { | |
log.debug("Request to extract Speech Text from Video : {}",file); | |
// Upload file to Aws | |
uploadFileToAwsBucket(file); | |
// Create a key that is like name for file and will be used for creating unique name based id for transcription job | |
String key = file.getOriginalFilename().replaceAll(" ", "_").toLowerCase(); | |
// Start Transcription Job and get result | |
StartTranscriptionJobResult startTranscriptionJobResult = startTranscriptionJob(key); | |
// Get name of job started for the file | |
String transcriptionJobName = startTranscriptionJobResult.getTranscriptionJob().getTranscriptionJobName(); | |
// Get result after the procesiing is complete | |
GetTranscriptionJobResult getTranscriptionJobResult = getTranscriptionJobResult(transcriptionJobName); | |
//delete file as processing is done | |
deleteFileFromAwsBucket(key); | |
// Url of result file for transcription | |
String transcriptFileUriString = getTranscriptionJobResult.getTranscriptionJob().getTranscript().getTranscriptFileUri(); | |
// Get the transcription response by downloading the file | |
TranscriptionResponseDTO transcriptionResponseDTO = downloadTranscriptionResponse(transcriptFileUriString); | |
//Delete the transcription job after finishing or it will get deleted after 90 days automatically if you do not call | |
deleteTranscriptionJob(transcriptionJobName); | |
return transcriptionResponseDTO; | |
} | |
////// For the transcription Response DTO | |
//// Check this Url for Gist >> https://gist.github.com/balvinder294/fd16a7bd8cf5d9a889161d370cf8ee3f |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment