OStack程序员社区-中国程序员成长平台

标题: ios - SpeechRecognizer 几分钟后失败 [打印本页]

作者: 菜鸟教程小白 时间: 2022-12-11 20:03
标题: ios - SpeechRecognizer 几分钟后失败

我正在开发一个使用 SFSpeechRecognizer 的 iOS 项目，它在开始时运行良好。我说一些话，它会回应。但是一两分钟后，它就失败了。它不提供任何已识别结果的反馈。我想知道这是否与缓冲区有关，但我不知道如何修复它。

我基本上使用 SpeechRecognizer 的 demo 来构建项目。不同之处在于我将识别的结果逐字存储在一个数组中。程序分析数组并响应某些单词，例如“播放”或之前设置的其他一些命令。程序响应命令后，删除数组中的这个元素。

说话很便宜，这里是代码:

识别器，你可以看到 supportedCommands 数组，它过滤了一些特定的单词供程序响应。其他部分与 https://developer.apple.com/library/content/samplecode/SpeakToMe/Listings/SpeakToMe_ViewController_swift.html#//apple_ref/doc/uid/TP40017110-SpeakToMe_ViewController_swift-DontLinkElementID_6 的演示类似

class SpeechRecognizer: NSObject, SFSpeechRecognizerDelegate {

    private var speechRecognizer: SFSpeechRecognizer!
    private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest!
    private var recognitionTask: SFSpeechRecognitionTask!
    private let audioEngine = AVAudioEngine()
    private let locale = Locale(identifier: "en-US")

    private var lastSavedString: String = ""
    private let supportedCommands = ["more", "play"]

    var speechInputQueue: [String] = [String]()

    func load() {
        print("load")
        prepareRecognizer(locale: locale)

        authorize()
    }

    func start() {
        print("start")
        if !audioEngine.isRunning {
            try! startRecording()
        }
    }

    func stop() {
        if audioEngine.isRunning {
            audioEngine.stop()
            recognitionRequest?.endAudio()

        }
    }

    private func authorize() {
        SFSpeechRecognizer.requestAuthorization { authStatus in
            OperationQueue.main.addOperation {
                switch authStatus {
                case .authorized:
                    print("Authorized!")
                case .denied:
                    print("Unauthorized!")
                case .restricted:
                    print("Unauthorized!")
                case .notDetermined:
                    print("Unauthorized!")
                }
            }
        }
    }

    private func prepareRecognizer(locale: Locale) {
        speechRecognizer = SFSpeechRecognizer(locale: locale)!
        speechRecognizer.delegate = self
    }

    private func startRecording() throws {

        // Cancel the previous task if it's running.
        if let recognitionTask = recognitionTask {
            recognitionTask.cancel()
            self.recognitionTask = nil
        }

        let audioSession = AVAudioSession.sharedInstance()
        try audioSession.setCategory(AVAudioSessionCategoryPlayAndRecord, with: .defaultToSpeaker)
        try audioSession.setMode(AVAudioSessionModeDefault)
        try audioSession.setActive(true, with: .notifyOthersOnDeactivation)

        recognitionRequest = SFSpeechAudioBufferRecognitionRequest()

        let inputNode = audioEngine.inputNode
        guard let recognitionRequest = recognitionRequest else { fatalError("Unable to created a SFSpeechAudioBufferRecognitionRequest object") }

        // Configure request so that results are returned before audio recording is finished
        recognitionRequest.shouldReportPartialResults = true

        // A recognition task represents a speech recognition session.
        // We keep a reference to the task so that it can be cancelled.
        recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
            var isFinal = false

            if let result = result {

                let temp = result.bestTranscription.formattedString.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines).lowercased()
                //print("temp", temp)
                if temp != self.lastSavedString && temp.count > self.lastSavedString.count {

                    var tempSplit = temp.split(separator: " ")
                    var lastSplit = self.lastSavedString.split(separator: " ")
                    while lastSplit.count > 0 {
                        if String(tempSplit[0]) == String(lastSplit[0]) {
                            tempSplit.remove(at: 0)
                            lastSplit.remove(at: 0)
                        }
                        else {
                            break
                        }
                    }

                    for command in tempSplit {
                        if self.supportedCommands.contains(String(command)) {
                            self.speechInputQueue.append(String(command))
                        }
                    }
                    self.lastSavedString = temp

                }
                isFinal = result.isFinal
            }

            if error != nil || isFinal {
                self.audioEngine.stop()
                inputNode.removeTap(onBus: 0)
                self.recognitionRequest = nil
                self.recognitionTask = nil
            }
        }

        let recordingFormat = inputNode.outputFormat(forBus: 0)
        inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
            self.recognitionRequest?.append(buffer)
        }

        audioEngine.prepare()

        try audioEngine.start()

    }
}

我们如何使用它:

    if self.speechRecognizer.speechInputQueue.count > 0 {
    if self.speechRecognizer.speechInputQueue[0] == "more" {
        print("temp", temp)
        print("content", content)
       // isSpeakingContent = true
        self.textToSpeech(text: content)
    }
    else if self.speechRecognizer.speechInputQueue[0] == "play" {
        print("try to play")
        let soundURL = URL(fileURLWithPath: Bundle.main.path(forResource: "cascade", ofType: "wav")!)

        do {
            audioPlayer = try AVAudioPlayer(contentsOf: soundURL)
        }
        catch {
            print(error)
        }
        audioPlayer.prepareToPlay()
        audioPlayer.play()
    }
    else {
        self.textToSpeech(text: "unrecognized command")
    }
    self.speechRecognizer.speechInputQueue.remove(at: 0)
    print("after :", self.speechRecognizer.speechInputQueue)
}

它响应某些命令并播放一些音频。

缓冲区有问题吗？也许经过一两分钟的识别，缓冲区已满？识别器会随着时间的推移而失败。

Best Answer-推荐答案

来自 WWDC 2016 Session 509: Speech Recognition API :

For iOS 10 we're starting with a strict audio duration limit of about one minute which is similar to that of keyboard dictation.

关于ios - SpeechRecognizer 几分钟后失败，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/49878238/

欢迎光临 OStack程序员社区-中国程序员成长平台 (https://ostack.cn/) Powered by Discuz! X3.4