Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ripper2hl/a62d81438bf6378212a33ed5bb75fa27 to your computer and use it in GitHub Desktop.
Save ripper2hl/a62d81438bf6378212a33ed5bb75fa27 to your computer and use it in GitHub Desktop.
Audio File Transcription for Transcribe API Aws == Tekraze.com
/********** Step 1 **********************
*********** File Imports ***************/
import com.amazonaws.services.transcribe.AmazonTranscribe;
import com.amazonaws.services.transcribe.AmazonTranscribeClientBuilder;
import com.amazonaws.services.transcribe.model.DeleteTranscriptionJobRequest;
import com.amazonaws.services.transcribe.model.GetTranscriptionJobRequest;
import com.amazonaws.services.transcribe.model.GetTranscriptionJobResult;
import com.amazonaws.services.transcribe.model.LanguageCode;
import com.amazonaws.services.transcribe.model.Media;
import com.amazonaws.services.transcribe.model.StartTranscriptionJobRequest;
import com.amazonaws.services.transcribe.model.StartTranscriptionJobResult;
import com.amazonaws.services.transcribe.model.TranscriptionJob;
import com.amazonaws.services.transcribe.model.TranscriptionJobStatus;
import com.fasterxml.jackson.databind.ObjectMapper;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;
// Check at Another Gist >> https://gist.github.com/balvinder294/fd16a7bd8cf5d9a889161d370cf8ee3f
import dehaze.mvp.service.dto.awstranscribe.TranscriptionResponseDTO;
/*********************************** Step 2***********************************
**************** Initialize Trascribe Client *******************************/
AmazonTranscribe transcribeClient() {
log.debug("Intialize Transcribe Client");
BasicAWSCredentials awsCreds = new BasicAWSCredentials(awsAccessKey, awsSecretKey);
AWSStaticCredentialsProvider awsStaticCredentialsProvider = new AWSStaticCredentialsProvider(awsCreds);
return AmazonTranscribeClientBuilder.standard().withCredentials(awsStaticCredentialsProvider)
.withRegion(awsRegion).build();
}
/***** Step 3*********************************************************************
* ***************Upload/Delte File to Aws method ***************************************
******* You can skip if you already have file in the aws bucket*******/
AmazonS3 s3Client() {
log.debug("Intialize AWS S3 Client");
BasicAWSCredentials awsCreds = new BasicAWSCredentials(awsAccessKey, awsSecretKey);
AWSStaticCredentialsProvider awsStaticCredentialsProvider = new AWSStaticCredentialsProvider(awsCreds);
return AmazonS3ClientBuilder.standard().withCredentials(awsStaticCredentialsProvider).withRegion(awsRegion)
.build();
}
public void uploadFileToAwsBucket(MultipartFile file) {
log.debug("Upload file to AWS Bucket {}", file);
String key = file.getOriginalFilename().replaceAll(" ", "_").toLowerCase();
try {
s3Client().putObject(bucketName, key, file.getInputStream(), null);
} catch (SdkClientException | IOException e) {
e.printStackTrace();
}
}
public void deleteFileFromAwsBucket(String fileName) {
log.debug("Delete File from AWS Bucket {}", fileName);
String key = fileName.replaceAll(" ", "_").toLowerCase();
s3Client().deleteObject(bucketName, key);
}
/*************** Step 4 ******************
* **** Start Transcription Job Method******/
StartTranscriptionJobResult startTranscriptionJob(String key) {
log.debug("Start Transcription Job By Key {}",key);
Media media = new Media().withMediaFileUri(s3Client().getUrl(bucketName, key).toExternalForm());
String jobName = key.concat(RandomString.make());
StartTranscriptionJobRequest startTranscriptionJobRequest = new StartTranscriptionJobRequest()
.withLanguageCode(LanguageCode.EnUS).withTranscriptionJobName(jobName).withMedia(media);
StartTranscriptionJobResult startTranscriptionJobResult = transcribeClient()
.startTranscriptionJob(startTranscriptionJobRequest);
return startTranscriptionJobResult;
}
/**************************Step 5*****************
* *** Get Transcription Job Result method *********/
GetTranscriptionJobResult getTranscriptionJobResult(String jobName) {
log.debug("Get Transcription Job Result By Job Name : {}",jobName);
GetTranscriptionJobRequest getTranscriptionJobRequest = new GetTranscriptionJobRequest()
.withTranscriptionJobName(jobName);
Boolean resultFound = false;
TranscriptionJob transcriptionJob = new TranscriptionJob();
GetTranscriptionJobResult getTranscriptionJobResult = new GetTranscriptionJobResult();
while (resultFound == false) {
getTranscriptionJobResult = transcribeClient().getTranscriptionJob(getTranscriptionJobRequest);
transcriptionJob = getTranscriptionJobResult.getTranscriptionJob();
if (transcriptionJob.getTranscriptionJobStatus()
.equalsIgnoreCase(TranscriptionJobStatus.COMPLETED.name())) {
return getTranscriptionJobResult;
} else if (transcriptionJob.getTranscriptionJobStatus()
.equalsIgnoreCase(TranscriptionJobStatus.FAILED.name())) {
return null;
} else if (transcriptionJob.getTranscriptionJobStatus()
.equalsIgnoreCase(TranscriptionJobStatus.IN_PROGRESS.name())) {
try {
Thread.sleep(15000);
} catch (InterruptedException e) {
log.debug("Interrupted Exception {}", e.getMessage());
}
}
}
return getTranscriptionJobResult;
}
/******************Step 6 **************************
* Download Transcription Result from URI Method *********/
TranscriptionResponseDTO downloadTranscriptionResponse(String uri){
log.debug("Download Transcription Result from Transcribe URi {}", uri);
OkHttpClient okHttpClient = new OkHttpClient()
.newBuilder()
.connectTimeout(60, TimeUnit.SECONDS)
.writeTimeout(60, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS)
.build();
Request request = new Request.Builder().url(uri).build();
Response response;
try {
response = okHttpClient.newCall(request).execute();
String body = response.body().string();
ObjectMapper objectMapper = new ObjectMapper();
response.close();
return objectMapper.readValue(body, TranscriptionResponseDTO.class);
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/***************************** Step 7 *****************************
* **** Delete Transcription Job Method ************
* TO delete transcription job after getting result****/
void deleteTranscriptionJob(String jobName) {
log.debug("Delete Transcription Job from amazon Transcribe {}",jobName);
DeleteTranscriptionJobRequest deleteTranscriptionJobRequest = new DeleteTranscriptionJobRequest()
.withTranscriptionJobName(jobName);
transcribeClient().deleteTranscriptionJob(deleteTranscriptionJobRequest);
}
/********************************* Step 8****************************************
* Extract Speech Text method that combines all methods to a single method******
***** You can do skip upload delete methods if you want to just process file in AWs
* ** by passing key for filename in bucket and create media *****/
public TranscriptionResponseDTO extractSpeechTextFromVideo(MultipartFile file) {
log.debug("Request to extract Speech Text from Video : {}",file);
// Upload file to Aws
uploadFileToAwsBucket(file);
// Create a key that is like name for file and will be used for creating unique name based id for transcription job
String key = file.getOriginalFilename().replaceAll(" ", "_").toLowerCase();
// Start Transcription Job and get result
StartTranscriptionJobResult startTranscriptionJobResult = startTranscriptionJob(key);
// Get name of job started for the file
String transcriptionJobName = startTranscriptionJobResult.getTranscriptionJob().getTranscriptionJobName();
// Get result after the procesiing is complete
GetTranscriptionJobResult getTranscriptionJobResult = getTranscriptionJobResult(transcriptionJobName);
//delete file as processing is done
deleteFileFromAwsBucket(key);
// Url of result file for transcription
String transcriptFileUriString = getTranscriptionJobResult.getTranscriptionJob().getTranscript().getTranscriptFileUri();
// Get the transcription response by downloading the file
TranscriptionResponseDTO transcriptionResponseDTO = downloadTranscriptionResponse(transcriptFileUriString);
//Delete the transcription job after finishing or it will get deleted after 90 days automatically if you do not call
deleteTranscriptionJob(transcriptionJobName);
return transcriptionResponseDTO;
}
////// For the transcription Response DTO
//// Check this Url for Gist >> https://gist.github.com/balvinder294/fd16a7bd8cf5d9a889161d370cf8ee3f
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment