(原) 语音唤醒

原创文章,请后转载,并注明出处。

语音唤醒是各智能音箱都在使用的,是当前人机语音交互的开始。之前使用的 snowboy ,似乎已“牺牲”,刚使用PicoVoice。比较可惜的是它还不支持中文,不过我想只使用它的唤醒,那也足够了。

Porcupine以块(帧)的形式接收音频。 .frame_length 属性提供每个帧的大小。Porcupine接受16 kHz音频与16位样本。对于每一帧,Porcupine返回一个代表检测到的关键字的数字。 -1 表示未检测到。正索引对应于关键字检测。

pip3 install pvporcupine
注册Picovoice控制台 https://console.picovoice.ai/ 获取到AccessKey.

Github github.com/Picovoice/porcupine

它内置了一些关键字模型

import pvporcupine


for keyword in pvporcupine.KEYWORDS:
    print(keyword)

看关键词检测的实例:

import pvporcupine
from pvrecorder import PvRecorder


porcupine = pvporcupine.create(access_key=access_key, keywords=keywords)
recoder = PvRecorder(device_index=-1, frame_length=porcupine.frame_length)


try:
    recoder.start()


    while True:
        keyword_index = porcupine.process(recoder.read())
        if keyword_index >= 0:
            print(f"Detected {keywords[keyword_index]}")


except KeyboardInterrupt:
    recoder.stop()
finally:
    porcupine.delete()
    recoder.delete()

看起来还有golang版本:
github.com/Picovoice/porcupine/demo/go/micdemo go run porcupine_mic_demo.go -access_key "" -keywords “computer,picovoice” 测试,使用了内置的两个唤醒词。
或者使用参数 -keyword_paths “/home/ease/tools/hi-easy.ppn” 调用自己的唤醒词,多个自定唤醒词使用逗号分隔(不能内置关键词和自制关键词一起用?)
或者使用此参数来调整敏感度:-sensitivities 0.3,0.6
以下代码修改过

package main

import (
        "flag"
        "fmt"
        "log"
        "os"
        "os/signal"
        "path/filepath"
        "strconv"
        "strings"

        porcupine "github.com/Picovoice/porcupine/binding/go/v3"
        pvrecorder "github.com/Picovoice/pvrecorder/binding/go"
)

func main() {
        accessKeyArg := flag.String("access_key", "", "AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)")
        keywordsArg := flag.String("keywords", "", fmt.Sprintf("Comma-separated list of built-in keywords. Available options are: %+q", porcupine.BuiltInKeywords))
        keywordPathsArg := flag.String("keyword_paths", "", "Comma-separated list of paths to keyword model files. "+
                "If not set it will be populated from -keywords argument")
        libraryPathArg := flag.String("library_path", "", "Path to Porcupine dynamic library file")
        modelPathArg := flag.String("model_path", "", "Path to Porcupine model file")
        sensitivitiesArg := flag.String("sensitivities", "", "Comma-separated list of sensitivity values for detecting keywords."+
                "Each value should be a number within [0, 1]. A higher "+
                "sensitivity results in fewer misses at the cost of increasing the false alarm rate. "+
                "If not set 0.5 will be used.")
        flag.Parse()

        p := porcupine.Porcupine{}

        if *accessKeyArg == "" {
                log.Fatalf("AccessKey is required.")
        }
        p.AccessKey = *accessKeyArg

        // validate library path
        if *libraryPathArg != "" {
                libraryPath, _ := filepath.Abs(*libraryPathArg)
                if _, err := os.Stat(libraryPath); os.IsNotExist(err) {
                        log.Fatalf("Could not find library file at %s", libraryPath)
                }

                p.LibraryPath = libraryPath
        }

        // validate model path
        if *modelPathArg != "" {
                modelPath, _ := filepath.Abs(*modelPathArg)
                if _, err := os.Stat(modelPath); os.IsNotExist(err) {
                        log.Fatalf("Could not find model file at %s", modelPath)
                }

                p.ModelPath = modelPath
        }

        // validate keyword arguments
        keywordPathsSplit := strings.Split(*keywordPathsArg, ",")
        if *keywordPathsArg == "" || len(keywordPathsSplit) == 0 {
                keywordsSplit := strings.Split(*keywordsArg, ",")
                if *keywordsArg == "" || len(keywordsSplit) == 0 {
                        log.Fatal("No built-in keywords or keyword model files were provided.")
                }

                for _, k := range keywordsSplit {
                        builtInKeyword := porcupine.BuiltInKeyword(k)
                        if !builtInKeyword.IsValid() {
                                log.Fatalf("'%s' is not a valid built-in keyword. Available options are: %+q", k, porcupine.BuiltInKeywords)
                        }

                        p.BuiltInKeywords = append(p.BuiltInKeywords, builtInKeyword)
                }
        } else {
                for _, k := range keywordPathsSplit {
                        keywordPath, _ := filepath.Abs(k)
                        if _, err := os.Stat(keywordPath); os.IsNotExist(err) {
                                log.Fatalf("Could not find keyword file at %s", keywordPath)
                        }
                        p.KeywordPaths = append(p.KeywordPaths, keywordPath)
                }
        }

        // validate sensitivities
        sensitivitiesSplit := strings.Split(*sensitivitiesArg, ",")
        if *sensitivitiesArg == "" || len(sensitivitiesSplit) == 0 {
                for range p.KeywordPaths {
                        p.Sensitivities = append(p.Sensitivities, 0.5)
                }
        } else {
                for _, sensitivityStr := range sensitivitiesSplit {
                        sensitivityFloat, err := strconv.ParseFloat(sensitivityStr, 32)
                        if err != nil || sensitivityFloat < 0 || sensitivityFloat > 1 {
                                log.Fatalf("Sensitivity value of '%s' is invalid. Must be a float32 between [0, 1].", sensitivityStr)
                        }
                        p.Sensitivities = append(p.Sensitivities, float32(sensitivityFloat))
                }
        }

        err := p.Init()
        if err != nil {
                log.Fatal(err)
        }
        defer func() {
                err := p.Delete()
                if err != nil {
                        log.Fatalf("Failed to release resources: %s", err)
                }
        }()

        recorder := pvrecorder.NewPvRecorder(porcupine.FrameLength)
        recorder.DeviceIndex = -1

        if err := recorder.Init(); err != nil {
                log.Fatalf("Error: %s.\n", err.Error())
        }
        defer recorder.Delete()

        if err := recorder.Start(); err != nil {
                log.Fatalf("Error: %s.\n", err.Error())
        }

        log.Printf("Listening...")

        signalCh := make(chan os.Signal, 1)
        waitCh := make(chan struct{})
        signal.Notify(signalCh, os.Interrupt)

        go func() {
                <-signalCh
                close(waitCh)
        }()

waitLoop:
        for {
                select {
                case <-waitCh:
                        log.Println("Stopping...")
                        break waitLoop
                default:
                        pcm, err := recorder.Read()
                        if err != nil {
                                log.Fatalf("Error: %s.\n", err.Error())
                        }
                        keywordIndex, err := p.Process(pcm)
                        if err != nil {
                                log.Fatal(err)
                        }
                        if keywordIndex >= 0 {
                                fmt.Printf("Keyword %d detected\n", keywordIndex)
                        }
                }
        }
}

因为在Pi0上使用,它居然也有Pi0上使用的c版本,需要编译:

gcc -std=c99 -O3 -o demo/respeaker-rpi0/porcupine_demo_mic \
-I include/ demo/respeaker-rpi0/porcupine_demo_mic.c \
-ldl -lasound

应先安装依靠库: sudo apt-get install libasound2-dev

./demo/respeaker-rpi0/porcupine_demo_mic \
${ACCESS_KEY} \
lib/raspberry-pi/arm11/libpv_porcupine.so \
lib/common/porcupine_params.pv \
0.65 \
plughw:CARD=seeed2micvoicec,DEV=0 \
resources/keyword_files/raspberry-pi/alexa_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/computer_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/hey\ google_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/hey\ siri_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/jarvis_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/picovoice_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/porcupine_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/bumblebee_raspberry-pi.ppn \
resources/keyword_files/raspberry-pi/terminator_raspberry-pi.ppn

根据你的唤醒词,将会改变主板上的RGB灯颜色。 看起来代码也不是很多,完全可以修改为自己的其它功能。

#include <dlfcn.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>

#include <alsa/asoundlib.h>
#include <asm/ioctl.h>
#include <linux/spi/spidev.h>
#include <signal.h>
#include <sys/ioctl.h>

#include "pv_porcupine.h"

static const uint8_t OFF_RGB[3] = {0, 0, 0};
static const uint8_t BLUE_RGB[3] = {0, 0, 255};
static const uint8_t GREEN_RGB[3] = {0, 255, 0};
static const uint8_t ORANGE_RGB[3] = {255, 128, 0};
static const uint8_t PINK_RGB[3] = {255, 51, 153};
static const uint8_t PURPLE_RGB[3] = {128, 0, 128};
static const uint8_t RED_RGB[3] = {255, 0, 0};
static const uint8_t WHITE_RGB[3] = {255, 255, 255};
static const uint8_t YELLOW_RGB[3] = {255, 255, 51};

static volatile bool is_interrupted = false;

/*
// LED SPI Documentation:
https://github.com/torvalds/linux/blob/master/include/uapi/linux/spi/spi.h
https://github.com/torvalds/linux/blob/master/include/uapi/linux/spi/spidev.h
https://cdn-shop.adafruit.com/datasheets/APA102.pdf
*/
static const uint8_t spi_mode = 0;
static const uint8_t spi_BPW = 8;
static const uint32_t spi_speed = 6000000;
static const uint16_t spi_delay = 0;
static int spidev_fd = -1;

static void setup_spi() {
    spidev_fd = open("/dev/spidev0.0", O_RDWR);
    if (spidev_fd < 0) {
        fprintf(stderr, "unable to open SPI device '%s'.\n", strerror(errno));
        exit(1);
    }

    if (ioctl(spidev_fd, SPI_IOC_WR_MODE, &spi_mode) < 0) {
        fprintf(stderr, "failed to change SPI mode '%s'.\n", strerror(errno));
        exit(1);
    }

    if (ioctl(spidev_fd, SPI_IOC_WR_BITS_PER_WORD, &spi_BPW) < 0) {
        fprintf(stderr, "failed to change SPI BPW '%s'.\n", strerror(errno));
        exit(1);
    }

    if (ioctl(spidev_fd, SPI_IOC_WR_MAX_SPEED_HZ, &spi_speed) < 0) {
        fprintf(stderr, "failed to change SPI speed '%s'.\n", strerror(errno));
        exit(1);
    }
}

static void spi_write_data(unsigned char *data, int len) {
    struct spi_ioc_transfer spi;
    memset(&spi, 0, sizeof(spi));

    spi.tx_buf = (unsigned long) data;
    spi.rx_buf = (unsigned long) data;
    spi.len = len;
    spi.delay_usecs = spi_delay;
    spi.speed_hz = spi_speed;
    spi.bits_per_word = spi_BPW;

    if (ioctl(spidev_fd, SPI_IOC_MESSAGE(1), &spi) < 0) {
        fprintf(stderr, "failed to write to SPI '%s'.\n", strerror(errno));
        exit(1);
    }
}

static void set_color(const uint8_t rgb[3]) {
    for (int32_t i = 0; i < 4; i++) {
        uint8_t zero = 0x00;
        spi_write_data(&zero, 1);
    }

    static const uint32_t BRIGHTNESS = 1;
    for (int32_t i = 0; i < 12; i++) {
        uint8_t led_frame[4];
        led_frame[0] = 0b11100000 | (0b00011111 & BRIGHTNESS);
        led_frame[1] = rgb[2];
        led_frame[2] = rgb[1];
        led_frame[3] = rgb[0];
        spi_write_data(led_frame, 4);
    }

    for (int32_t i = 0; i < 4; i++) {
        uint8_t zero = 0x00;
        spi_write_data(&zero, 1);
    }
}

void interrupt_handler(int _) {
    (void) _;
    is_interrupted = true;
}

int main(int argc, char *argv[]) {
    if (argc != 15) {
        fprintf(stderr,
                "usage : %s access_key library_path model_path sensitivity input_audio_device alexa_keyword_path "
                "computer_keyword_path hey_google_keyword_path hey_siri_keyword_path jarvis_keyword_path "
                "picovoice_keyword_path porcupine_keyword_path bumblebee_keyword_path terminator_keyword_path\n",
                argv[0]);
        exit(1);
    }

    signal(SIGINT, interrupt_handler);

    const char *access_key = argv[1];
    const char *library_path = argv[2];
    const char *model_path = argv[3];
    const float sensitivity = (float) atof(argv[4]);
    const char *input_audio_device = argv[5];
    const char **keyword_paths = (const char **) &argv[6];
    const int32_t num_keywords = 9;

    void *porcupine_library = dlopen(library_path, RTLD_NOW);
    if (!porcupine_library) {
        fprintf(stderr, "failed to open library.\n");
        exit(1);
    }

    char *error = NULL;

    const char *(*pv_status_to_string_func)(pv_status_t) = dlsym(porcupine_library, "pv_status_to_string");
    if ((error = dlerror()) != NULL) {
        fprintf(stderr, "failed to load 'pv_status_to_string' with '%s'.\n", error);
        exit(1);
    }

    int32_t (*pv_sample_rate_func)() = dlsym(porcupine_library, "pv_sample_rate");
    if ((error = dlerror()) != NULL) {
        fprintf(stderr, "failed to load 'pv_sample_rate' with '%s'.\n", error);
        exit(1);
    }

    pv_status_t (*pv_porcupine_init_func)(const char *, const char *, int32_t, const char *const *, const float *, pv_porcupine_t **) =
            dlsym(porcupine_library, "pv_porcupine_init");
    if ((error = dlerror()) != NULL) {
        fprintf(stderr, "failed to load 'pv_porcupine_init' with '%s'.\n", error);
        exit(1);
    }

    void (*pv_porcupine_delete_func)(pv_porcupine_t *) = dlsym(porcupine_library, "pv_porcupine_delete");
    if ((error = dlerror()) != NULL) {
        fprintf(stderr, "failed to load 'pv_porcupine_delete' with '%s'.\n", error);
        exit(1);
    }

    pv_status_t (*pv_porcupine_process_func)(pv_porcupine_t *, const int16_t *, int32_t *) = dlsym(porcupine_library, "pv_porcupine_process");
    if ((error = dlerror()) != NULL) {
        fprintf(stderr, "failed to load 'pv_porcupine_process' with '%s'.\n", error);
        exit(1);
    }

    int32_t (*pv_porcupine_frame_length_func)() = dlsym(porcupine_library, "pv_porcupine_frame_length");
    if ((error = dlerror()) != NULL) {
        fprintf(stderr, "failed to load 'pv_porcupine_frame_length' with '%s'.\n", error);
        exit(1);
    }

    pv_porcupine_t *porcupine = NULL;
    float sensitivities[num_keywords];
    for (int32_t i = 0; i < num_keywords; i++) {
        sensitivities[i] = sensitivity;
    }
    pv_status_t status = pv_porcupine_init_func(access_key, model_path, num_keywords, keyword_paths, sensitivities, &porcupine);
    if (status != PV_STATUS_SUCCESS) {
        fprintf(stderr, "'pv_porcupine_init' failed with '%s'\n", pv_status_to_string_func(status));
        exit(1);
    }

    snd_pcm_t *alsa_handle = NULL;
    int error_code = snd_pcm_open(&alsa_handle, input_audio_device, SND_PCM_STREAM_CAPTURE, 0);
    if (error_code != 0) {
        fprintf(stderr, "'snd_pcm_open' failed with '%s'\n", snd_strerror(error_code));
        exit(1);
    }

    snd_pcm_hw_params_t *hardware_params = NULL;
    error_code = snd_pcm_hw_params_malloc(&hardware_params);
    if (error_code != 0) {
        fprintf(stderr, "'snd_pcm_hw_params_malloc' failed with '%s'\n", snd_strerror(error_code));
        exit(1);
    }

    error_code = snd_pcm_hw_params_any(alsa_handle, hardware_params);
    if (error_code != 0) {
        fprintf(stderr, "'snd_pcm_hw_params_any' failed with '%s'\n", snd_strerror(error_code));
        exit(1);
    }

    error_code = snd_pcm_hw_params_set_access(alsa_handle, hardware_params, SND_PCM_ACCESS_RW_INTERLEAVED);
    if (error_code != 0) {
        fprintf(stderr, "'snd_pcm_hw_params_set_access' failed with '%s'\n", snd_strerror(error_code));
        exit(1);
    }

    error_code = snd_pcm_hw_params_set_format(alsa_handle, hardware_params, SND_PCM_FORMAT_S16_LE);
    if (error_code != 0) {
        fprintf(stderr, "'snd_pcm_hw_params_set_format' failed with '%s'\n", snd_strerror(error_code));
        exit(1);
    }

    error_code = snd_pcm_hw_params_set_rate(alsa_handle, hardware_params, pv_sample_rate_func(), 0);
    if (error_code != 0) {
        fprintf(stderr, "'snd_pcm_hw_params_set_rate' failed with '%s'\n", snd_strerror(error_code));
        exit(1);
    }

    error_code = snd_pcm_hw_params_set_channels(alsa_handle, hardware_params, 1);
    if (error_code != 0) {
        fprintf(stderr, "'snd_pcm_hw_params_set_channels' failed with '%s'\n", snd_strerror(error_code));
        exit(1);
    }

    error_code = snd_pcm_hw_params(alsa_handle, hardware_params);
    if (error_code != 0) {
        fprintf(stderr, "'snd_pcm_hw_params' failed with '%s'\n", snd_strerror(error_code));
        exit(1);
    }

    snd_pcm_hw_params_free(hardware_params);

    error_code = snd_pcm_prepare(alsa_handle);
    if (error_code != 0) {
        fprintf(stderr, "'snd_pcm_prepare' failed with '%s'\n", snd_strerror(error_code));
        exit(1);
    }

    const int32_t frame_length = pv_porcupine_frame_length_func();

    int16_t *pcm = malloc(frame_length * sizeof(int16_t));
    if (!pcm) {
        fprintf(stderr, "failed to allocate memory for audio buffer\n");
        exit(1);
    }

    setup_spi();
    fprintf(stdout, "[Listening]\n");

    while (!is_interrupted) {
        const int count = snd_pcm_readi(alsa_handle, pcm, frame_length);
        if (count < 0) {
            fprintf(stderr, "'snd_pcm_readi' failed with '%s'\n", snd_strerror(count));
            exit(1);
        } else if (count != frame_length) {
            fprintf(stderr, "read %d frames instead of %d\n", count, frame_length);
            exit(1);
        }

        int32_t keyword_index = -1;
        status = pv_porcupine_process_func(porcupine, pcm, &keyword_index);
        if (status != PV_STATUS_SUCCESS) {
            fprintf(stderr, "'pv_porcupine_process' failed with '%s'\n", pv_status_to_string_func(status));
            exit(1);
        }
        if (keyword_index != -1) {
            static const char *KEYWORDS[] = {
                    "Alexa",
                    "Computer",
                    "Hey Google",
                    "Hey Siri",
                    "Jarvis",
                    "Picovoice",
                    "Porcupine",
                    "Bumblebee",
                    "Terminator",
            };

            fprintf(stdout, "detected '%s'\n", KEYWORDS[keyword_index]);

            static const char *COLORS[] = {"yellow", "white", "red", "purple", "pink", "green", "blue", "orange", "off"};

            switch (keyword_index) {
                case 0:
                    set_color(YELLOW_RGB);
                    break;
                case 1:
                    set_color(WHITE_RGB);
                    break;
                case 2:
                    set_color(RED_RGB);
                    break;
                case 3:
                    set_color(PURPLE_RGB);
                    break;
                case 4:
                    set_color(PINK_RGB);
                    break;
                case 5:
                    set_color(GREEN_RGB);
                    break;
                case 6:
                    set_color(BLUE_RGB);
                    break;
                case 7:
                    set_color(ORANGE_RGB);
                    break;
                case 8:
                    set_color(OFF_RGB);
                    break;
            }
        }
    }

    free(pcm);
    snd_pcm_close(alsa_handle);
    pv_porcupine_delete_func(porcupine);
    dlclose(porcupine_library);
    close(spidev_fd);

    return 0;
}

通过这个示例,发现资源目录下面其实有中文唤醒词:
resources/keyword_files_zh/raspberry-pi 这是pi使用的:
你好_raspberry-pi.ppn 咖啡_raspberry-pi.ppn 水饺_raspberry-pi.ppn 豪猪_raspberry-pi.ppn

必须要配合中文的模型文件 lib/common/porcupine_params_zh.pv

相关文章