我正在开发一个使用 SFSpeechRecognizer
的 iOS 项目,它在开始时运行良好。我说一些话,它会回应。但是一两分钟后,它就失败了。它不提供任何已识别结果的反馈。
我想知道这是否与缓冲区有关,但我不知道如何修复它。
我基本上使用 SpeechRecognizer 的 demo 来构建项目。不同之处在于我将识别的结果逐字存储在一个数组中。程序分析数组并响应某些单词,例如“播放”或之前设置的其他一些命令。程序响应命令后,删除数组中的这个元素。
说话很便宜,这里是代码:
识别器,你可以看到 supportedCommands
数组,它过滤了一些特定的单词供程序响应。其他部分与 https://developer.apple.com/library/content/samplecode/SpeakToMe/Listings/SpeakToMe_ViewController_swift.html#//apple_ref/doc/uid/TP40017110-SpeakToMe_ViewController_swift-DontLinkElementID_6 的演示类似
class SpeechRecognizer: NSObject, SFSpeechRecognizerDelegate {
private var speechRecognizer: SFSpeechRecognizer!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest!
private var recognitionTask: SFSpeechRecognitionTask!
private let audioEngine = AVAudioEngine()
private let locale = Locale(identifier: "en-US")
private var lastSavedString: String = ""
private let supportedCommands = ["more", "play"]
var speechInputQueue: [String] = [String]()
func load() {
print("load")
prepareRecognizer(locale: locale)
authorize()
}
func start() {
print("start")
if !audioEngine.isRunning {
try! startRecording()
}
}
func stop() {
if audioEngine.isRunning {
audioEngine.stop()
recognitionRequest?.endAudio()
}
}
private func authorize() {
SFSpeechRecognizer.requestAuthorization { authStatus in
OperationQueue.main.addOperation {
switch authStatus {
case .authorized:
print("Authorized!")
case .denied:
print("Unauthorized!")
case .restricted:
print("Unauthorized!")
case .notDetermined:
print("Unauthorized!")
}
}
}
}
private func prepareRecognizer(locale: Locale) {
speechRecognizer = SFSpeechRecognizer(locale: locale)!
speechRecognizer.delegate = self
}
private func startRecording() throws {
// Cancel the previous task if it's running.
if let recognitionTask = recognitionTask {
recognitionTask.cancel()
self.recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(AVAudioSessionCategoryPlayAndRecord, with: .defaultToSpeaker)
try audioSession.setMode(AVAudioSessionModeDefault)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
let inputNode = audioEngine.inputNode
guard let recognitionRequest = recognitionRequest else { fatalError("Unable to created a SFSpeechAudioBufferRecognitionRequest object") }
// Configure request so that results are returned before audio recording is finished
recognitionRequest.shouldReportPartialResults = true
// A recognition task represents a speech recognition session.
// We keep a reference to the task so that it can be cancelled.
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
if let result = result {
let temp = result.bestTranscription.formattedString.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines).lowercased()
//print("temp", temp)
if temp != self.lastSavedString && temp.count > self.lastSavedString.count {
var tempSplit = temp.split(separator: " ")
var lastSplit = self.lastSavedString.split(separator: " ")
while lastSplit.count > 0 {
if String(tempSplit[0]) == String(lastSplit[0]) {
tempSplit.remove(at: 0)
lastSplit.remove(at: 0)
}
else {
break
}
}
for command in tempSplit {
if self.supportedCommands.contains(String(command)) {
self.speechInputQueue.append(String(command))
}
}
self.lastSavedString = temp
}
isFinal = result.isFinal
}
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
}
}
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
}
}
我们如何使用它:
if self.speechRecognizer.speechInputQueue.count > 0 {
if self.speechRecognizer.speechInputQueue[0] == "more" {
print("temp", temp)
print("content", content)
// isSpeakingContent = true
self.textToSpeech(text: content)
}
else if self.speechRecognizer.speechInputQueue[0] == "play" {
print("try to play")
let soundURL = URL(fileURLWithPath: Bundle.main.path(forResource: "cascade", ofType: "wav")!)
do {
audioPlayer = try AVAudioPlayer(contentsOf: soundURL)
}
catch {
print(error)
}
audioPlayer.prepareToPlay()
audioPlayer.play()
}
else {
self.textToSpeech(text: "unrecognized command")
}
self.speechRecognizer.speechInputQueue.remove(at: 0)
print("after :", self.speechRecognizer.speechInputQueue)
}
它响应某些命令并播放一些音频。
缓冲区有问题吗?也许经过一两分钟的识别,缓冲区已满?识别器会随着时间的推移而失败。
来自 WWDC 2016 Session 509: Speech Recognition API :
For iOS 10 we're starting with a strict audio duration limit of about one minute which is similar to that of keyboard dictation.
关于ios - SpeechRecognizer 几分钟后失败,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/49878238/
欢迎光临 OStack程序员社区-中国程序员成长平台 (https://ostack.cn/) | Powered by Discuz! X3.4 |