Skip to content

Instantly share code, notes, and snippets.

@fnzainal
Forked from hendrawd/SpeechToTextActivity.java
Created December 15, 2016 09:52
Show Gist options
  • Save fnzainal/4b779944e898e96bd9e214bfe30e1be4 to your computer and use it in GitHub Desktop.
Save fnzainal/4b779944e898e96bd9e214bfe30e1be4 to your computer and use it in GitHub Desktop.
Simple working test of IBM Watson Speech To Text capability on Android platform
<?xml version="1.0" encoding="utf-8"?>
<!--
Copyright 2016 hendrawd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:orientation="vertical">
<TextView
android:id="@+id/text_view"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Halo!" />
<Button
android:id="@+id/button_record"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:text="Start Record" />
</LinearLayout>
//put these dependencies in your app level build.gradle
dependencies {
compile 'com.ibm.watson.developer_cloud:java-sdk:3.5.2'
compile 'com.ibm.watson.developer_cloud:android-sdk:0.2.1'
}
/**
* Copyright (C) 2016 hendrawd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import android.app.Activity;
import android.os.Bundle;
import android.view.View;
import android.widget.Button;
import android.widget.TextView;
import android.widget.Toast;
import com.ibm.mobileappbuilder.speechtotexttest20161215031450.R;
import com.ibm.watson.developer_cloud.android.library.audio.MicrophoneInputStream;
import com.ibm.watson.developer_cloud.android.library.audio.utils.ContentType;
import com.ibm.watson.developer_cloud.speech_to_text.v1.SpeechToText;
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.RecognizeOptions;
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults;
import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.BaseRecognizeCallback;
import java.io.InputStream;
/**
* @author hendrawd on 12/15/16
*/
public class SpeechToTextActivity extends Activity {
private Button buttonRecord;
private TextView textView;
private SpeechToText speechToTextService;
private boolean listening = false;
private InputStream inputStream;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_speech_to_text);
buttonRecord = (Button) findViewById(R.id.button_record);
textView = (TextView) findViewById(R.id.text_view);
speechToTextService = initSpeechToTextService();
buttonRecord.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
if (!listening) {
startRecord();
} else {
stopRecord();
}
}
});
}
private SpeechToText initSpeechToTextService() {
SpeechToText speechToTextService = new SpeechToText();
String username = getString(R.string.username);
String password = getString(R.string.password);
speechToTextService.setUsernameAndPassword(username, password);
speechToTextService.setEndPoint("https://stream.watsonplatform.net/speech-to-text/api");
return speechToTextService;
}
private RecognizeOptions getRecognizeOptions() {
return new RecognizeOptions.Builder()
.continuous(true)
.contentType(ContentType.OPUS.toString())
.model("en-US_BroadbandModel")
.interimResults(true)
.inactivityTimeout(2000).build();
}
private void startRecord() {
buttonRecord.setText("Stop Record");
inputStream = new MicrophoneInputStream(true);
new Thread(new Runnable() {
@Override
public void run() {
try {
speechToTextService.recognizeUsingWebSocket(inputStream, getRecognizeOptions(),
new BaseRecognizeCallback() {
@Override
public void onTranscription(SpeechResults speechResults) {
try {
String text = speechResults.getResults().get(0).getAlternatives().get(0).getTranscript();
showResult(text);
} catch (IndexOutOfBoundsException e) {
showError(e);
}
}
@Override
public void onError(Exception e) {
showError(e);
}
@Override
public void onDisconnected() {
}
});
} catch (Exception e) {
showError(e);
}
}
}).start();
listening = true;
}
private void stopRecord() {
buttonRecord.setText("Start Record");
try {
inputStream.close();
listening = false;
} catch (Exception e) {
e.printStackTrace();
}
}
private void showError(final Exception e) {
runOnUiThread(new Runnable() {
@Override
public void run() {
Toast.makeText(SpeechToTextActivity.this, e.getMessage(), Toast.LENGTH_SHORT).show();
e.printStackTrace();
}
});
}
private void showResult(final String result) {
runOnUiThread(new Runnable() {
@Override
public void run() {
textView.setText(result);
}
});
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment