You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
doctor_app_flutter/speech_to_text/ios/Classes/SwiftSpeechToTextPlugin.swift

581 lines
23 KiB
Swift

import Flutter
import UIKit
import Speech
import os.log
import Try
public enum SwiftSpeechToTextMethods: String {
case has_permission
case initialize
case listen
case stop
case cancel
case locales
case unknown // just for testing
}
public enum SwiftSpeechToTextCallbackMethods: String {
case textRecognition
case notifyStatus
case notifyError
case soundLevelChange
}
public enum SpeechToTextStatus: String {
case listening
case notListening
case unavailable
case available
}
public enum SpeechToTextErrors: String {
case onDeviceError
case noRecognizerError
case listenFailedError
case missingOrInvalidArg
}
public enum ListenMode: Int {
case deviceDefault = 0
case dictation = 1
case search = 2
case confirmation = 3
}
struct SpeechRecognitionWords : Codable {
let recognizedWords: String
let confidence: Decimal
}
struct SpeechRecognitionResult : Codable {
let alternates: [SpeechRecognitionWords]
let finalResult: Bool
}
struct SpeechRecognitionError : Codable {
let errorMsg: String
let permanent: Bool
}
enum SpeechToTextError: Error {
case runtimeError(String)
}
@available(iOS 10.0, *)
public class SwiftSpeechToTextPlugin: NSObject, FlutterPlugin {
private var channel: FlutterMethodChannel
private var registrar: FlutterPluginRegistrar
private var recognizer: SFSpeechRecognizer?
private var currentRequest: SFSpeechAudioBufferRecognitionRequest?
private var currentTask: SFSpeechRecognitionTask?
private var listeningSound: AVAudioPlayer?
private var successSound: AVAudioPlayer?
private var cancelSound: AVAudioPlayer?
private var rememberedAudioCategory: AVAudioSession.Category?
private var previousLocale: Locale?
private var onPlayEnd: (() -> Void)?
private var returnPartialResults: Bool = true
private var failedListen: Bool = false
private var listening = false
private let audioSession = AVAudioSession.sharedInstance()
private let audioEngine = AVAudioEngine()
private let jsonEncoder = JSONEncoder()
private let busForNodeTap = 0
private let speechBufferSize: AVAudioFrameCount = 1024
private static var subsystem = Bundle.main.bundleIdentifier!
private let pluginLog = OSLog(subsystem: "com.csdcorp.speechToText", category: "plugin")
public static func register(with registrar: FlutterPluginRegistrar) {
let channel = FlutterMethodChannel(name: "plugin.csdcorp.com/speech_to_text", binaryMessenger: registrar.messenger())
let instance = SwiftSpeechToTextPlugin( channel, registrar: registrar )
registrar.addMethodCallDelegate(instance, channel: channel )
}
init( _ channel: FlutterMethodChannel, registrar: FlutterPluginRegistrar ) {
self.channel = channel
self.registrar = registrar
}
public func handle(_ call: FlutterMethodCall, result: @escaping FlutterResult) {
switch call.method {
case SwiftSpeechToTextMethods.has_permission.rawValue:
hasPermission( result )
case SwiftSpeechToTextMethods.initialize.rawValue:
initialize( result )
case SwiftSpeechToTextMethods.listen.rawValue:
guard let argsArr = call.arguments as? Dictionary<String,AnyObject>,
let partialResults = argsArr["partialResults"] as? Bool, let onDevice = argsArr["onDevice"] as? Bool, let listenModeIndex = argsArr["listenMode"] as? Int
else {
DispatchQueue.main.async {
result(FlutterError( code: SpeechToTextErrors.missingOrInvalidArg.rawValue,
message:"Missing arg partialResults, onDevice, and listenMode are required",
details: nil ))
}
return
}
var localeStr: String? = nil
if let localeParam = argsArr["localeId"] as? String {
localeStr = localeParam
}
guard let listenMode = ListenMode(rawValue: listenModeIndex) else {
DispatchQueue.main.async {
result(FlutterError( code: SpeechToTextErrors.missingOrInvalidArg.rawValue,
message:"invalid value for listenMode, must be 0-2, was \(listenModeIndex)",
details: nil ))
}
return
}
listenForSpeech( result, localeStr: localeStr, partialResults: partialResults, onDevice: onDevice, listenMode: listenMode )
case SwiftSpeechToTextMethods.stop.rawValue:
stopSpeech( result )
case SwiftSpeechToTextMethods.cancel.rawValue:
cancelSpeech( result )
case SwiftSpeechToTextMethods.locales.rawValue:
locales( result )
default:
os_log("Unrecognized method: %{PUBLIC}@", log: pluginLog, type: .error, call.method)
DispatchQueue.main.async {
result( FlutterMethodNotImplemented)
}
}
}
private func hasPermission( _ result: @escaping FlutterResult) {
let has = SFSpeechRecognizer.authorizationStatus() == SFSpeechRecognizerAuthorizationStatus.authorized &&
AVAudioSession.sharedInstance().recordPermission == AVAudioSession.RecordPermission.granted
DispatchQueue.main.async {
result( has )
}
}
private func initialize( _ result: @escaping FlutterResult) {
var success = false
let status = SFSpeechRecognizer.authorizationStatus()
switch status {
case SFSpeechRecognizerAuthorizationStatus.notDetermined:
SFSpeechRecognizer.requestAuthorization({(status)->Void in
success = status == SFSpeechRecognizerAuthorizationStatus.authorized
if ( success ) {
AVAudioSession.sharedInstance().requestRecordPermission({(granted: Bool)-> Void in
if granted {
self.setupSpeechRecognition(result)
} else{
self.sendBoolResult( false, result );
os_log("User denied permission", log: self.pluginLog, type: .info)
}
})
}
else {
self.sendBoolResult( false, result );
}
});
case SFSpeechRecognizerAuthorizationStatus.denied:
os_log("Permission permanently denied", log: self.pluginLog, type: .info)
sendBoolResult( false, result );
case SFSpeechRecognizerAuthorizationStatus.restricted:
os_log("Device restriction prevented initialize", log: self.pluginLog, type: .info)
sendBoolResult( false, result );
default:
os_log("Has permissions continuing with setup", log: self.pluginLog, type: .debug)
setupSpeechRecognition(result)
}
}
fileprivate func sendBoolResult( _ value: Bool, _ result: @escaping FlutterResult) {
DispatchQueue.main.async {
result( value )
}
}
fileprivate func setupListeningSound() {
listeningSound = loadSound("assets/sounds/speech_to_text_listening.m4r")
successSound = loadSound("assets/sounds/speech_to_text_stop.m4r")
cancelSound = loadSound("assets/sounds/speech_to_text_cancel.m4r")
}
fileprivate func loadSound( _ assetPath: String ) -> AVAudioPlayer? {
var player: AVAudioPlayer? = nil
let soundKey = registrar.lookupKey(forAsset: assetPath )
guard !soundKey.isEmpty else {
return player
}
if let soundPath = Bundle.main.path(forResource: soundKey, ofType:nil) {
let soundUrl = URL(fileURLWithPath: soundPath )
do {
player = try AVAudioPlayer(contentsOf: soundUrl )
player?.delegate = self
} catch {
// no audio
}
}
return player
}
private func setupSpeechRecognition( _ result: @escaping FlutterResult) {
setupRecognizerForLocale( locale: Locale.current )
guard recognizer != nil else {
sendBoolResult( false, result );
return
}
recognizer?.delegate = self
setupListeningSound()
sendBoolResult( true, result );
}
private func setupRecognizerForLocale( locale: Locale ) {
if ( previousLocale == locale ) {
return
}
previousLocale = locale
recognizer = SFSpeechRecognizer( locale: locale )
}
private func getLocale( _ localeStr: String? ) -> Locale {
guard let aLocaleStr = localeStr else {
return Locale.current
}
let locale = Locale(identifier: aLocaleStr)
return locale
}
private func stopSpeech( _ result: @escaping FlutterResult) {
if ( !listening ) {
sendBoolResult( false, result );
return
}
stopAllPlayers()
if let sound = successSound {
onPlayEnd = {() -> Void in
self.currentTask?.finish()
self.stopCurrentListen( )
self.sendBoolResult( true, result )
return
}
sound.play()
}
else {
stopCurrentListen( )
sendBoolResult( true, result );
}
}
private func cancelSpeech( _ result: @escaping FlutterResult) {
if ( !listening ) {
sendBoolResult( false, result );
return
}
stopAllPlayers()
if let sound = cancelSound {
onPlayEnd = {() -> Void in
self.currentTask?.cancel()
self.stopCurrentListen( )
self.sendBoolResult( true, result )
return
}
sound.play()
}
else {
self.currentTask?.cancel()
stopCurrentListen( )
sendBoolResult( true, result );
}
}
private func stopAllPlayers() {
cancelSound?.stop()
successSound?.stop()
listeningSound?.stop()
}
private func stopCurrentListen( ) {
stopAllPlayers()
currentRequest?.endAudio()
do {
try trap {
self.audioEngine.stop()
}
}
catch {
os_log("Error stopping engine: %{PUBLIC}@", log: pluginLog, type: .error, error.localizedDescription)
}
do {
try trap {
let inputNode = self.audioEngine.inputNode
inputNode.removeTap(onBus: self.busForNodeTap);
}
}
catch {
os_log("Error removing trap: %{PUBLIC}@", log: pluginLog, type: .error, error.localizedDescription)
}
do {
if let rememberedAudioCategory = rememberedAudioCategory {
try self.audioSession.setCategory(rememberedAudioCategory)
}
}
catch {
os_log("Error stopping listen: %{PUBLIC}@", log: pluginLog, type: .error, error.localizedDescription)
}
do {
try self.audioSession.setActive(false, options: .notifyOthersOnDeactivation)
}
catch {
os_log("Error deactivation: %{PUBLIC}@", log: pluginLog, type: .info, error.localizedDescription)
}
currentRequest = nil
currentTask = nil
onPlayEnd = nil
listening = false
}
private func listenForSpeech( _ result: @escaping FlutterResult, localeStr: String?, partialResults: Bool, onDevice: Bool, listenMode: ListenMode ) {
if ( nil != currentTask || listening ) {
sendBoolResult( false, result );
return
}
do {
// let inErrorTest = true
failedListen = false
returnPartialResults = partialResults
setupRecognizerForLocale(locale: getLocale(localeStr))
guard let localRecognizer = recognizer else {
result(FlutterError( code: SpeechToTextErrors.noRecognizerError.rawValue,
message:"Failed to create speech recognizer",
details: nil ))
return
}
if ( onDevice ) {
if #available(iOS 13.0, *), !localRecognizer.supportsOnDeviceRecognition {
result(FlutterError( code: SpeechToTextErrors.onDeviceError.rawValue,
message:"on device recognition is not supported on this device",
details: nil ))
}
}
rememberedAudioCategory = self.audioSession.category
try self.audioSession.setCategory(AVAudioSession.Category.playAndRecord, options: .defaultToSpeaker)
// try self.audioSession.setMode(AVAudioSession.Mode.measurement)
try self.audioSession.setMode(AVAudioSession.Mode.default)
try self.audioSession.setActive(true, options: .notifyOthersOnDeactivation)
if let sound = listeningSound {
self.onPlayEnd = {()->Void in
if ( !self.failedListen ) {
self.listening = true
self.invokeFlutter( SwiftSpeechToTextCallbackMethods.notifyStatus, arguments: SpeechToTextStatus.listening.rawValue )
}
}
sound.play()
}
self.audioEngine.reset();
let inputNode = self.audioEngine.inputNode
if(inputNode.inputFormat(forBus: 0).channelCount == 0){
throw SpeechToTextError.runtimeError("Not enough available inputs.")
}
self.currentRequest = SFSpeechAudioBufferRecognitionRequest()
guard let currentRequest = self.currentRequest else {
sendBoolResult( false, result );
return
}
currentRequest.shouldReportPartialResults = true
if #available(iOS 13.0, *), onDevice {
currentRequest.requiresOnDeviceRecognition = true
}
switch listenMode {
case ListenMode.dictation:
currentRequest.taskHint = SFSpeechRecognitionTaskHint.dictation
break
case ListenMode.search:
currentRequest.taskHint = SFSpeechRecognitionTaskHint.search
break
case ListenMode.confirmation:
currentRequest.taskHint = SFSpeechRecognitionTaskHint.confirmation
break
default:
break
}
self.currentTask = self.recognizer?.recognitionTask(with: currentRequest, delegate: self )
let recordingFormat = inputNode.outputFormat(forBus: self.busForNodeTap)
try trap {
inputNode.installTap(onBus: self.busForNodeTap, bufferSize: self.speechBufferSize, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
currentRequest.append(buffer)
self.updateSoundLevel( buffer: buffer )
}
}
// if ( inErrorTest ){
// throw SpeechToTextError.runtimeError("for testing only")
// }
self.audioEngine.prepare()
try self.audioEngine.start()
if nil == listeningSound {
listening = true
self.invokeFlutter( SwiftSpeechToTextCallbackMethods.notifyStatus, arguments: SpeechToTextStatus.listening.rawValue )
}
sendBoolResult( true, result );
}
catch {
failedListen = true
os_log("Error starting listen: %{PUBLIC}@", log: pluginLog, type: .error, error.localizedDescription)
stopCurrentListen()
sendBoolResult( false, result );
invokeFlutter( SwiftSpeechToTextCallbackMethods.notifyStatus, arguments: SpeechToTextStatus.notListening.rawValue )
let speechError = SpeechRecognitionError(errorMsg: "error_listen_failed", permanent: true )
do {
let errorResult = try jsonEncoder.encode(speechError)
invokeFlutter( SwiftSpeechToTextCallbackMethods.notifyError, arguments: String( data:errorResult, encoding: .utf8) )
} catch {
os_log("Could not encode JSON", log: pluginLog, type: .error)
}
}
}
private func updateSoundLevel( buffer: AVAudioPCMBuffer) {
guard
let channelData = buffer.floatChannelData
else {
return
}
let channelDataValue = channelData.pointee
let channelDataValueArray = stride(from: 0,
to: Int(buffer.frameLength),
by: buffer.stride).map{ channelDataValue[$0] }
let frameLength = Float(buffer.frameLength)
let rms = sqrt(channelDataValueArray.map{ $0 * $0 }.reduce(0, +) / frameLength )
let avgPower = 20 * log10(rms)
self.invokeFlutter( SwiftSpeechToTextCallbackMethods.soundLevelChange, arguments: avgPower )
}
/// Build a list of localId:name with the current locale first
private func locales( _ result: @escaping FlutterResult ) {
var localeNames = [String]();
let locales = SFSpeechRecognizer.supportedLocales();
let currentLocale = Locale.current
if let idName = buildIdNameForLocale(forIdentifier: currentLocale.identifier ) {
localeNames.append(idName)
}
for locale in locales {
if ( locale.identifier == currentLocale.identifier) {
continue
}
if let idName = buildIdNameForLocale(forIdentifier: locale.identifier ) {
localeNames.append(idName)
}
}
DispatchQueue.main.async {
result(localeNames)
}
}
private func buildIdNameForLocale( forIdentifier: String ) -> String? {
var idName: String?
if let name = Locale.current.localizedString(forIdentifier: forIdentifier ) {
let sanitizedName = name.replacingOccurrences(of: ":", with: " ")
idName = "\(forIdentifier):\(sanitizedName)"
}
return idName
}
private func handleResult( _ transcriptions: [SFTranscription], isFinal: Bool ) {
if ( !isFinal && !returnPartialResults ) {
return
}
var speechWords: [SpeechRecognitionWords] = []
for transcription in transcriptions {
let words: SpeechRecognitionWords = SpeechRecognitionWords(recognizedWords: transcription.formattedString, confidence: confidenceIn( transcription))
speechWords.append( words )
}
let speechInfo = SpeechRecognitionResult(alternates: speechWords, finalResult: isFinal )
do {
let speechMsg = try jsonEncoder.encode(speechInfo)
if let speechStr = String( data:speechMsg, encoding: .utf8) {
os_log("Encoded JSON result: %{PUBLIC}@", log: pluginLog, type: .debug, speechStr )
invokeFlutter( SwiftSpeechToTextCallbackMethods.textRecognition, arguments: speechStr )
}
} catch {
os_log("Could not encode JSON", log: pluginLog, type: .error)
}
}
private func confidenceIn( _ transcription: SFTranscription ) -> Decimal {
guard ( transcription.segments.count > 0 ) else {
return 0;
}
var totalConfidence: Float = 0.0;
for segment in transcription.segments {
totalConfidence += segment.confidence
}
let avgConfidence: Float = totalConfidence / Float(transcription.segments.count )
let confidence: Float = (avgConfidence * 1000).rounded() / 1000
return Decimal( string: String( describing: confidence ) )!
}
private func invokeFlutter( _ method: SwiftSpeechToTextCallbackMethods, arguments: Any? ) {
DispatchQueue.main.async {
self.channel.invokeMethod( method.rawValue, arguments: arguments )
}
}
}
@available(iOS 10.0, *)
extension SwiftSpeechToTextPlugin : SFSpeechRecognizerDelegate {
public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
let availability = available ? SpeechToTextStatus.available.rawValue : SpeechToTextStatus.unavailable.rawValue
os_log("Availability changed: %{PUBLIC}@", log: pluginLog, type: .debug, availability)
invokeFlutter( SwiftSpeechToTextCallbackMethods.notifyStatus, arguments: availability )
}
}
@available(iOS 10.0, *)
extension SwiftSpeechToTextPlugin : SFSpeechRecognitionTaskDelegate {
public func speechRecognitionDidDetectSpeech(_ task: SFSpeechRecognitionTask) {
// Do nothing for now
}
public func speechRecognitionTaskFinishedReadingAudio(_ task: SFSpeechRecognitionTask) {
reportError(source: "FinishedReadingAudio", error: task.error)
invokeFlutter( SwiftSpeechToTextCallbackMethods.notifyStatus, arguments: SpeechToTextStatus.notListening.rawValue )
}
public func speechRecognitionTaskWasCancelled(_ task: SFSpeechRecognitionTask) {
reportError(source: "TaskWasCancelled", error: task.error)
invokeFlutter( SwiftSpeechToTextCallbackMethods.notifyStatus, arguments: SpeechToTextStatus.notListening.rawValue )
}
public func speechRecognitionTask(_ task: SFSpeechRecognitionTask, didFinishSuccessfully successfully: Bool) {
reportError(source: "FinishSuccessfully", error: task.error)
stopCurrentListen( )
}
public func speechRecognitionTask(_ task: SFSpeechRecognitionTask, didHypothesizeTranscription transcription: SFTranscription) {
reportError(source: "HypothesizeTranscription", error: task.error)
handleResult( [transcription], isFinal: false )
}
public func speechRecognitionTask(_ task: SFSpeechRecognitionTask, didFinishRecognition recognitionResult: SFSpeechRecognitionResult) {
reportError(source: "FinishRecognition", error: task.error)
let isFinal = recognitionResult.isFinal
handleResult( recognitionResult.transcriptions, isFinal: isFinal )
}
private func reportError( source: String, error: Error?) {
if ( nil != error) {
os_log("%{PUBLIC}@ with error: %{PUBLIC}@", log: pluginLog, type: .debug, source, error.debugDescription)
}
}
}
@available(iOS 10.0, *)
extension SwiftSpeechToTextPlugin : AVAudioPlayerDelegate {
public func audioPlayerDidFinishPlaying(_ player: AVAudioPlayer,
successfully flag: Bool) {
if let playEnd = self.onPlayEnd {
playEnd()
}
}
}