OStack程序员社区-中国程序员成长平台

标题: ios - SpeechRecognizer 几分钟后失败 [打印本页]

作者: 菜鸟教程小白    时间: 2022-12-11 20:03
标题: ios - SpeechRecognizer 几分钟后失败

我正在开发一个使用 SFSpeechRecognizer 的 iOS 项目,它在开始时运行良好。我说一些话,它会回应。但是一两分钟后,它就失败了。它不提供任何已识别结果的反馈。 我想知道这是否与缓冲区有关,但我不知道如何修复它。

我基本上使用 SpeechRecognizer 的 demo 来构建项目。不同之处在于我将识别的结果逐字存储在一个数组中。程序分析数组并响应某些单词,例如“播放”或之前设置的其他一些命令。程序响应命令后,删除数组中的这个元素。

说话很便宜,这里是代码:

  1. 识别器,你可以看到 supportedCommands 数组,它过滤了一些特定的单词供程序响应。其他部分与 https://developer.apple.com/library/content/samplecode/SpeakToMe/Listings/SpeakToMe_ViewController_swift.html#//apple_ref/doc/uid/TP40017110-SpeakToMe_ViewController_swift-DontLinkElementID_6 的演示类似

    class SpeechRecognizer: NSObject, SFSpeechRecognizerDelegate {
    
        private var speechRecognizer: SFSpeechRecognizer!
        private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest!
        private var recognitionTask: SFSpeechRecognitionTask!
        private let audioEngine = AVAudioEngine()
        private let locale = Locale(identifier: "en-US")
    
        private var lastSavedString: String = ""
        private let supportedCommands = ["more", "play"]
    
        var speechInputQueue: [String] = [String]()
    
        func load() {
            print("load")
            prepareRecognizer(locale: locale)
    
            authorize()
        }
    
        func start() {
            print("start")
            if !audioEngine.isRunning {
                try! startRecording()
            }
        }
    
        func stop() {
            if audioEngine.isRunning {
                audioEngine.stop()
                recognitionRequest?.endAudio()
    
            }
        }
    
        private func authorize() {
            SFSpeechRecognizer.requestAuthorization { authStatus in
                OperationQueue.main.addOperation {
                    switch authStatus {
                    case .authorized:
                        print("Authorized!")
                    case .denied:
                        print("Unauthorized!")
                    case .restricted:
                        print("Unauthorized!")
                    case .notDetermined:
                        print("Unauthorized!")
                    }
                }
            }
        }
    
        private func prepareRecognizer(locale: Locale) {
            speechRecognizer = SFSpeechRecognizer(locale: locale)!
            speechRecognizer.delegate = self
        }
    
        private func startRecording() throws {
    
            // Cancel the previous task if it's running.
            if let recognitionTask = recognitionTask {
                recognitionTask.cancel()
                self.recognitionTask = nil
            }
    
            let audioSession = AVAudioSession.sharedInstance()
            try audioSession.setCategory(AVAudioSessionCategoryPlayAndRecord, with: .defaultToSpeaker)
            try audioSession.setMode(AVAudioSessionModeDefault)
            try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
    
            recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
    
            let inputNode = audioEngine.inputNode
            guard let recognitionRequest = recognitionRequest else { fatalError("Unable to created a SFSpeechAudioBufferRecognitionRequest object") }
    
            // Configure request so that results are returned before audio recording is finished
            recognitionRequest.shouldReportPartialResults = true
    
            // A recognition task represents a speech recognition session.
            // We keep a reference to the task so that it can be cancelled.
            recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
                var isFinal = false
    
                if let result = result {
    
                    let temp = result.bestTranscription.formattedString.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines).lowercased()
                    //print("temp", temp)
                    if temp != self.lastSavedString && temp.count > self.lastSavedString.count {
    
                        var tempSplit = temp.split(separator: " ")
                        var lastSplit = self.lastSavedString.split(separator: " ")
                        while lastSplit.count > 0 {
                            if String(tempSplit[0]) == String(lastSplit[0]) {
                                tempSplit.remove(at: 0)
                                lastSplit.remove(at: 0)
                            }
                            else {
                                break
                            }
                        }
    
                        for command in tempSplit {
                            if self.supportedCommands.contains(String(command)) {
                                self.speechInputQueue.append(String(command))
                            }
                        }
                        self.lastSavedString = temp
    
                    }
                    isFinal = result.isFinal
                }
    
                if error != nil || isFinal {
                    self.audioEngine.stop()
                    inputNode.removeTap(onBus: 0)
                    self.recognitionRequest = nil
                    self.recognitionTask = nil
                }
            }
    
            let recordingFormat = inputNode.outputFormat(forBus: 0)
            inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
                self.recognitionRequest?.append(buffer)
            }
    
            audioEngine.prepare()
    
            try audioEngine.start()
    
        }
    }
    
  2. 我们如何使用它:

        if self.speechRecognizer.speechInputQueue.count > 0 {
        if self.speechRecognizer.speechInputQueue[0] == "more" {
            print("temp", temp)
            print("content", content)
           // isSpeakingContent = true
            self.textToSpeech(text: content)
        }
        else if self.speechRecognizer.speechInputQueue[0] == "play" {
            print("try to play")
            let soundURL = URL(fileURLWithPath: Bundle.main.path(forResource: "cascade", ofType: "wav")!)
    
            do {
                audioPlayer = try AVAudioPlayer(contentsOf: soundURL)
            }
            catch {
                print(error)
            }
            audioPlayer.prepareToPlay()
            audioPlayer.play()
        }
        else {
            self.textToSpeech(text: "unrecognized command")
        }
        self.speechRecognizer.speechInputQueue.remove(at: 0)
        print("after :", self.speechRecognizer.speechInputQueue)
    }
    

它响应某些命令并播放一些音频。

缓冲区有问题吗?也许经过一两分钟的识别,缓冲区已满?识别器会随着时间的推移而失败。



Best Answer-推荐答案


来自 WWDC 2016 Session 509: Speech Recognition API :

For iOS 10 we're starting with a strict audio duration limit of about one minute which is similar to that of keyboard dictation.

关于ios - SpeechRecognizer 几分钟后失败,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/49878238/






欢迎光临 OStack程序员社区-中国程序员成长平台 (https://ostack.cn/) Powered by Discuz! X3.4