(原) 洁癖:Pi0单文件语音播放

原创文章,请后转载,并注明出处。

之前是用mplayer来播放音频,比如Pi0启动时的提示,IP地址的提示,用户登陆的提示等。当没有连网时,需要语音提示一般就是制作多个音频文件来进行播放。
之前的IP播报,因为涉及不同的数字组合,将多个音频合为一个音频文件,通过多次mplayer进行播放,但实际中发现每个数字之间的时间延迟较大。于是考虑自己做一个可以指定起始时间的音频播放程序。


package main

import (
	"embed"
	"fmt"
	"os"
	"strconv"
	"strings"
	"time"

	"github.com/faiface/beep"
	"github.com/faiface/beep/effects"
	"github.com/faiface/beep/mp3"
	"github.com/faiface/beep/speaker"
)

//go:embed sound.mp3
var sound embed.FS

//每个字符/文字对应的时间起止
var yp = map[string]string{
	"0":  "0.1,0.6",
	"1":  "0.6,1.1",
	"2":  "1.1,1.6",
        ...
	"开启":      "60.7,61.3",
	"关闭":      "61.4,62.0",
	"服务":      "62.05,62.7",
	"搜索":      "62.7,63.5",
	"保存":      "63.5,64.3",
	"启动":      "64.4,65.1",
	"已经":      "65.1,65.75",
	"网络":      "65.8,66.5",
	"未连接":     "66.5,67.4",
	"已完成":     "67.5,68.4",
	"音量":      "68.45,69.1",
	"加":       "69.1,69.7",
	"减":       "69.7,70.2",
	"乘":       "70.3,70.7",
	"除":       "70.8,71.3",
	"微信":      "71.8,72.4",
	"短信":      "72.5,73.2",
	"手机":      "73.3,74.1",
	"邮件":      "74.1,74.8",
	"复制":      "74.9,75.6",
	"系统将重启":   "90.9,92.4",
	"系统将关闭":   "92.4,93.8",
	"接收到命令":   "93.9,95.0",
	"wifi添加中": "95.1,96.4",
	"命令已执行":   "96.5,97.7",
	"搜索中":     "97.7,98.7",
	"执行中":     "98.7,99.6",
	"查询中":     "99.6,100.5",
	"请稍等":     "100.6,101.5",
        ...
	"微风":      "123.0,123.6",
}

type audioPanel struct {
	sampleRate beep.SampleRate
	//streamer   beep.StreamSeeker
	ctrl      *beep.Ctrl
	resampler *beep.Resampler
	volume    *effects.Volume
	buffer    *beep.Buffer
	buf       beep.StreamSeeker
}

func newAudioPanel(format beep.Format, streamer beep.StreamSeeker) *audioPanel {
	ctrl := &beep.Ctrl{Streamer: beep.Loop(1, streamer)}
	resampler := beep.ResampleRatio(2, 1, ctrl)
	volume := &effects.Volume{Streamer: resampler, Base: 1}
	buffer := beep.NewBuffer(format)
	buffer.Append(streamer)
	buf := buffer.Streamer(0, buffer.Len())
	return &audioPanel{format.SampleRate, ctrl, resampler, volume, buffer, buf}
}

func (ap *audioPanel) play() {
	speaker.Play(ap.buf)
}

func OnError(e error) {
	if e != nil {
		fmt.Println(e)
		os.Exit(1)
	}
}

func main() {
	cmd := "" // 控制播放位置及时长
	if len(os.Args) < 1 {
		fmt.Fprintf(os.Stderr, "用法: %s 字符\n", os.Args[0])
		os.Exit(1)
	}

	cmd = os.Args[1]

	//f, err := os.Open(sound)
	f, err := sound.Open("sound.mp3")
	OnError(err)
	streamer, format, err := mp3.Decode(f)
	OnError(err)
	defer streamer.Close()

	speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/5))

	ap := newAudioPanel(format, streamer)
	soundLen := ap.buffer.Len()
	ap.play()

	if cmd == "" { // 没有控制,直接完整播放
		for {
			if ap.buf.Position() >= soundLen {
				break
			}
			time.Sleep(time.Second)
		}
	} else { // 有控制
		for _, n := range strings.Split(cmd, ",") {
			if yp[n] == "" {
				continue
			}
			speaker.Lock()
			ap.ctrl.Paused = true
			s := strings.Split(yp[n], ",")
			spos, _ := strconv.ParseFloat(s[0], 64)                  // 位置
			epos, _ := strconv.ParseFloat(s[1], 64)                  // 时长
			startTime := time.Millisecond * time.Duration(spos*1000) // 开始时间
			endTime := time.Millisecond * time.Duration(epos*1000)   // 结束时间
			ap.buf.Seek(ap.sampleRate.N(startTime))
			ap.ctrl.Paused = false
			speaker.Unlock()
			//fmt.Println(startTime, " ", endTime)
			for {
				position := ap.sampleRate.D(ap.buf.Position()) // 当前位置
				if position.Round(time.Millisecond) >= endTime {
					break
				}
			}
		}

		time.Sleep(time.Millisecond * 600)
	}
}

这里使用了笨办法,设置每个关键词的音频起始点。聪明一点的是提前搜索音频的静音超过0.2秒的时间,作为一段,关键词只需要标准在哪一段就可以了。

在Pi0上编译运行,出现的问题是等待时间过长,毕竟Pi0那个硬件资源有限。启动时间超过1分钟,受不了。
把程序改为无缓存方式。

package main

import (
	"embed"
	"fmt"
	"os"
	"strconv"
	"strings"
	"time"

	"github.com/faiface/beep"
	"github.com/faiface/beep/effects"
	"github.com/faiface/beep/mp3"
	"github.com/faiface/beep/speaker"
)

//go:embed sound.mp3
var sound embed.FS

var yp = map[string]string{
	"0":  "0.1,0.6",
	"1":  "0.6,1.1",
	"2":  "1.1,1.6",
	"3":  "1.7,2.3",
	"4":  "2.3,2.9",
	"5":  "2.9,3.4",
	"6":  "3.4,3.9",
	"7":  "3.9,4.5",
	"8":  "4.5,5.0",
	"9":  "5.0,5.5",
	"10": "5.5,6.2",
	"11": "6.15,6.9",
	"12": "6.9,7.55",
	"13": "7.6,8.4",
	"14": "8.4,9.1",
	"15": "9.1,9.8",
	"16": "9.9,10.50",
	"17": "10.6,11.35",
	"18": "11.35,12.05",
	"19": "12.10,12.80",
	"20": "12.8,13.6",
	"21": "13.6,14.4",
	"22": "14.5,15.25",
	"23": "15.30,16.1",
	"24": "16.2,17.0",
	"雨加雪":     "122.1,123.0",
	"微风":      "123.0,123.6",
}

type audioPanel struct {
	sampleRate beep.SampleRate
	streamer   beep.StreamSeeker
	ctrl       *beep.Ctrl
	resampler  *beep.Resampler
	volume     *effects.Volume
}

func newAudioPanel(format beep.Format, streamer beep.StreamSeeker) *audioPanel {
	ctrl := &beep.Ctrl{Streamer: beep.Loop(1, streamer)}
	resampler := beep.ResampleRatio(4, 1, ctrl)
	volume := &effects.Volume{Streamer: resampler, Base: 2}
	return &audioPanel{format.SampleRate, streamer, ctrl, resampler, volume}
}

func (ap *audioPanel) play() {
	speaker.Play(ap.volume)
}

func OnError(e error) {
	if e != nil {
		fmt.Println(e)
		os.Exit(1)
	}
}

func main() {
	cmd := "" // 控制播放位置及时长
	if len(os.Args) <= 1 {
		fmt.Fprintf(os.Stderr, "用法: %s 字符\n", os.Args[0])
		os.Exit(1)
	}

	cmd = os.Args[1]

	//f, err := os.Open(sound)
	f, err := sound.Open("sound.mp3")
	OnError(err)
	streamer, format, err := mp3.Decode(f)
	OnError(err)
	defer streamer.Close()

	speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/5))

	ap := newAudioPanel(format, streamer)
	soundLen := ap.streamer.Len()
	ap.play()

	if cmd == "" { // 没有控制,直接完整播放
		for {
			if ap.streamer.Position() >= soundLen {
				break
			}
			time.Sleep(time.Second)
		}
	} else { // 有控制
		for _, n := range strings.Split(cmd, ",") {
			if yp[n] == "" {
				continue
			}
			speaker.Lock()
			ap.ctrl.Paused = true
			s := strings.Split(yp[n], ",")
			spos, _ := strconv.ParseFloat(s[0], 64)                  // 位置
			epos, _ := strconv.ParseFloat(s[1], 64)                  // 时长
			startTime := time.Millisecond * time.Duration(spos*1000) // 开始时间
			endTime := time.Millisecond * time.Duration(epos*1000)   // 结束时间
			ap.streamer.Seek(ap.sampleRate.N(startTime))
			ap.ctrl.Paused = false
			speaker.Unlock()
			//fmt.Println(startTime, " ", endTime)
			for {
				position := ap.sampleRate.D(ap.streamer.Position()) // 当前位置
				if position.Round(time.Millisecond) >= endTime {
					break
				}
			}
		}

		time.Sleep(time.Millisecond * 600)
	}
}

直接读取文件,在Pi0上却导致音频卡顿,启动问题倒是没有了。但都嵌入到可执行文件一起了,还会将它又反存到磁盘?

看起来应该缓存方式,只缓存需要的部份。失败告终,学会了怎么用此库来控制播放位置。对于硬件资源稍好的设备是没问题的。

临时的妥协方案:将音频改为wav,通过sox带的play来播放,启动与中间延迟都合适。(play -q sound.wav trim 1.1 = 1.5)

相关文章