音视频基础(三)WAV格式与PCM简介

  • Post author:
  • Post category:其他


最近的工作涉及到语音识别相关的研究,因此先简单的梳理一下WAV格式和PCM。以前用c++实现了mp3 player,这个时候再来回顾下代码实现,将WAV的播放 看了下。

什么是PCM

直接上百度百科的定义:PCM(Pulse Code Modulation)脉冲编码调制是数字通信的编码方式之一。主要过程是将话音、图像等模拟信号每隔一定时间进行取样,使其离散化,同时将抽样值按分层单位四舍五入取整量化,同时将抽样值按一组二进制码来表示抽样脉冲的幅值。


简单的说,PCM就是直接对原始的模拟声波信号 进行 采样 后得到的 数据

什么是 采样率 sample rate

采样率 是 指 一秒钟 从脉冲 信号 中 采样了 多少个 sample

举例: sample rate: 16000 HZ ,意思是一秒钟 从 脉冲信号中 采样16000个点(sample)

什么是比特率 bits per second

从字面定义,即:一秒钟的声音信号 占有的比特数。

简言之:比特率 表征 采样的 数据 的 信息逼真程度,比特率越高,数据的逼真程度越高。

使用 alsa sound 进行 pcm 数据的播放

关于alsa sound

从网上找了alsa sound的解释如下:ALSA(Advanced Linux Sound Architecture)是linux上主流的音频结构,在没有出现ALSA架构之前,一直使用的是OSS(Open Sound System)音频架构。

简单的说,alsa sound是linux下的音频框架,使用alsa sound的 接口,即可以播放pcm数据。这里,正好可以用这个接口,来体验pcm的声音效果,以便有个 真切的 将pcm传到 电脑喇叭的 声音 体验

使用 alsa soun 播放pcm

  • 计算320ms的pcm数据长度,因为我的代码是每次播放320ms长度的sample数据,所以先计算出320ms的采样数据长度。

    直接给出公式如下:
简单的说下:
sample_rate 标识1秒的采样的sample数,单声道的采样数=采样率,双声道的采样数=采样率*channel数。 320ms的采样数 = 1秒的采样数*320/1000 = 采样率*声道数*32/100
samples_per_320ms_ = header_.sample_rate*header_.channels*32/100;

wav格式

以.wav为后缀的文件就是wav格式的文件,先上百度百科的定义:是微软公司专门为Windows开发的一种标准数字音频文件,该文件能记录各种单声道或立体声的声音信息,并能保证声音不失真。


简单的说:WAV文件就是在PCM数据前加了个PCM的信息说明头,仅此而已


这个头部数据占有字节内容如下:

资料领取直通车:



音视频开发&流媒体服务器资料文档+视频教程


icon-default.png?t=M85B
https://docs.qq.com/doc/DTm5idlJ1Y1dWZFZU


音视频学习直通车:



【免费】FFmpeg/WebRTC/RTMP/NDK/Android音视频流媒体高级开发


icon-default.png?t=M85B
https://ke.qq.com/course/3202131?flowToken=1042584


RIFF 4个字节
UNKNOWN 4个字节,
WAVE 4 bytes
fmt 4bytes
UNKNOWN 4bytes
采样率、声道数、比特率、一个sample占的bit数 16bytes
extra 2bytes //根据chunk_size == 18才有
fact 4bytes
data 4bytes
UNKNOWN 4bytes
---------------
到这一步,就计算出接下来PCM数据的长度,接下来的位置就是PCM数据了

最后附播放alsa sound 播放pcm的源码

class WAVParser {
public:
typedef struct {
    int format_tag;
    int channels;
    unsigned int sample_rate;
    unsigned int bit_rate;
    int block_align;
    int bits_per_sample;
    int data_pos;
    int data_size;
}FrameHeader;

typedef struct {

    int Init(const FrameHeader& header);
}FrameInfo;

    static int GetFrame(const unsigned char* data, int len, int& frame_len, FrameInfo&info);

    WAVParser(const unsigned char* data, int len);
    ~WAVParser();

    int Get320msSample(const unsigned char* data, int len, int& samples, int& channels, int&samplerate);

protected:
    static int FindFrameHeader(const unsigned char* data, int len);

    static int ParseFrameHeader(const unsigned char* data, int len, FrameHeader& header);
    
    static void TypeFrameHeader(const FrameHeader& header) {
        PRINT("=============================================");
        PRINT("header.format_tag = %u", header.format_tag);
        PRINT("header.channels = %u", header.channels);
        PRINT("header.sample_rate = %u", header.sample_rate);
        PRINT("header.bit_rate = %u", header.bit_rate);
        PRINT("header.block_align = %u", header.block_align);
        PRINT("header.bits_per_sample = %u", header.bits_per_sample);
    }

private:
    int index_;
    FrameHeader header_;
    const unsigned char* data_;
    const int data_len_;
    int samples_per_320ms_;

};
///
//WAV//
///
int WAVParser::FrameInfo::Init(const FrameHeader& header) {

    return 0;
}

int WAVParser::GetFrame(const unsigned char* data, int len, int& frame_len, FrameInfo&info) {
    FrameHeader header;
    int pos = -1;
    const unsigned char* temp_data = data;
    int temp_len = len;
    frame_len = 0;
    
    pos = FindFrameHeader(temp_data, temp_len);
    if (pos < 0)
        return -1;
    
    int ret = ParseFrameHeader(&temp_data[pos], temp_len, header);    
    if (ret != 0)
        return -1;

    //TypeFrameHeader(header);
    return 0;
}

int WAVParser::FindFrameHeader(const unsigned char* data, int len) {
    // nothing to do
    const char* RIFF = "RIFF";
    int pos = 0;
    
    while (pos < len - 4) {
        if (memcmp(&data[pos], RIFF, 4) == 0) {
            PRINT("found header");
            return pos;
        }
        pos++;
    }
    return -1;
}

int WAVParser::ParseFrameHeader(const unsigned char* data, int len, FrameHeader& header) {
    unsigned int chunk_size = 0;
    unsigned int temp;
    int pos = 0;

    if (memcmp(&data[pos], "RIFF", 4) != 0) {
        PRINT("not RIFF");
        return -1;
    }

    pos += 4;
    temp = data[pos];
    chunk_size += temp;
    temp = data[pos+1];
    temp = temp<<8;
    chunk_size += temp;
    temp = data[pos+2];
    temp = temp<<16;
    chunk_size += temp;
    temp = data[pos+3];
    temp = temp<<24;
    chunk_size += temp;

    pos += 4;
    if (memcmp(&data[pos], "WAVE", 4) != 0) {
        PRINT("not WAVE");
        return -1;
    }
    //PRINT("RIFF chunk_size = %u", chunk_size);

    pos += 4;
    if (memcmp(&data[pos], "fmt ", 4) != 0) {
        PRINT("not fmt");
        return -1;
    }

    pos += 4;
    temp = data[pos];
    chunk_size = 0;
    chunk_size += temp;
    temp = data[pos+1];
    temp = temp<<8;
    chunk_size += temp;
    temp = data[pos+2];
    temp = temp<<16;
    chunk_size += temp;
    temp = data[pos+3];
    temp = temp<<24;
    chunk_size += temp;

    //PRINT("fmt chunk_size = %u", chunk_size);
    pos += 4;

    temp = data[pos];
    header.format_tag = temp;
    pos++;
    temp = data[pos];
    temp = temp<<8;
    header.format_tag += temp;
    pos++;
    
    temp = data[pos];
    header.channels = temp;
    pos++;
    temp = data[pos];
    temp = temp<<8;
    header.channels += temp;
    pos++;
    
    temp = data[pos];
    header.sample_rate = temp;
    pos++;
    temp = data[pos];
    temp = temp<<8;
    header.sample_rate += temp;
    pos++;
    temp = data[pos];
    temp = temp<<16;
    header.sample_rate += temp;
    pos++;
    temp = data[pos];
    temp = temp<<24;
    header.sample_rate += temp;
    pos++;
    
    
    temp = data[pos];
    header.bit_rate = temp;
    pos++;
    temp = data[pos];
    temp = temp<<8;
    header.bit_rate += temp;
    pos++;
    temp = data[pos];
    temp = temp<<16;
    header.bit_rate += temp;
    pos++;
    temp = data[pos];
    temp = temp<<24;
    header.bit_rate += temp;
    pos++;
    
    temp = data[pos];
    header.block_align = temp;
    pos++;
    temp = data[pos];
    temp = temp<<8;
    header.block_align += temp;
    pos++;
    
    temp = data[pos];
    header.bits_per_sample = temp;
    pos++;
    temp = data[pos];
    temp = temp<<8;
    header.bits_per_sample += temp;
    pos++;

    // extra 2 bytes
    if (chunk_size == 18) {
        pos += 2;
    }
    
    // fact chunk optional
    if (memcmp(&data[pos], "fact", 4) == 0) {
        PRINT("fact");
        chunk_size = 0;
        pos += 12;
    }


    while (memcmp(&data[pos], "data", 4) != 0) {
        pos++;
    }
    pos += 4;

    chunk_size = 0;
    chunk_size += temp;
    temp = data[pos+1];
    temp = temp<<8;
    chunk_size += temp;
    temp = data[pos+2];
    temp = temp<<16;
    chunk_size += temp;
    temp = data[pos+3];
    temp = temp<<24;
    chunk_size += temp;
    pos += 4;

    PRINT("data chunk_size = %u", chunk_size);
    header.data_size = chunk_size;
    header.data_pos = pos;
    

#if 0
    // calculate duration time
    {
        int sr = header.sample_rate;
        int ch = header.channels;
        assert(header.bits_per_sample == sizeof(short)*8);
        int data_size = header.data_size;
        int samples = data_size*8/header.bits_per_sample;
        assert(data_size%header.bits_per_sample == 0);
        int samples_per_channel = samples/ch;
        int seconds = samples_per_channel/sr;
        PRINT("play %d seconds", seconds);
    }
#endif

    return 0;
}

WAVParser::WAVParser(const unsigned char* data, int len):data_(data), data_len_(len), index_(0) {
    int pos = -1;
    pos = FindFrameHeader(data_, data_len_);
    assert(pos >= 0);

    int ret = ParseFrameHeader(&data_[pos], data_len_, header_);
    assert(ret == 0);
    
    if (ret == 0) {
        samples_per_320ms_ = header_.sample_rate*header_.channels*32/100;
    }
}

WAVParser::~WAVParser() {
}

int WAVParser::Get320msSample(const unsigned char* data, int len, int& samples, int& channels, int&samplerate) {
    samples = 0;
    channels = 0;

    const unsigned char* temp = &data_[header_.data_pos];

    temp += index_;
    int size = samples_per_320ms_*header_.bits_per_sample/8;
    if (index_ + size > header_.data_size || size > len) {
        return -1;
    }

    memcpy(data, temp, size);
    index_ += size;
    samples = samples_per_320ms_;
    channels = header_.channels;
    samplerate = header_.sample_rate;

    return 0;
}

播放PCM的相关代码

void wav_play(const unsigned char* data, int len) {
    int ret;
    int samples;
    int channels;
    int samplerate;
    PCMPlayer player(16000, 1);
    WAVParser parser(data, len);
    unsigned char* buffer = new unsigned char[1024*1024];

    while (1) {
        ret = parser.Get320msSample(buffer, 1024*1024, samples, channels, samplerate);
        if (ret != 0)
            break;
        player.play((short*)buffer, samples, samplerate, channels);
    }

    delete []buffer;
}

#ifndef _PLAY_PCM_H
#define _PLAY_PCM_H
#include <alsa/asoundlib.h>
#include "util.h"
class PCMPlayer {
public:
    PCMPlayer(int sample_rate, int channels):
            channels_(channels),
            sample_rate_(sample_rate),
            init_(false) {

        snd_pcm_hw_params_t *params;
        int rc = snd_pcm_open(&handle_, "default",SND_PCM_STREAM_PLAYBACK, 0);
        if (rc < 0) {
            fprintf(stderr, "unable to open pcm device: %s\n", snd_strerror(rc));
            exit(1);
        }

        /* Allocate a hardware parameters object. */
        snd_pcm_hw_params_alloca(&params);
        /* Fill it in with default values. */
        snd_pcm_hw_params_any(handle_, params);
        /* Set the desired hardware parameters. */
        /* Interleaved mode */
        snd_pcm_hw_params_set_access(handle_, params, SND_PCM_ACCESS_RW_INTERLEAVED);

        /* Signed 16-bit little-endian format */
        snd_pcm_hw_params_set_format(handle_, params, SND_PCM_FORMAT_S16_LE);

        /* Two channels (stereo) */
        snd_pcm_hw_params_set_channels(handle_, params, channels_);

        /* 44100 bits/second sampling rate (CD quality) */
        unsigned int val = sample_rate_;
        int dir;
        snd_pcm_hw_params_set_rate_near(handle_, params, &val, &dir); 

        /* Set period size to 32 frames. */
        snd_pcm_uframes_t frames = 0;
        snd_pcm_hw_params_set_period_size_near(handle_, params, &frames, &dir);
        
        /* Write the parameters to the driver */
        rc = snd_pcm_hw_params(handle_, params);
        if (rc < 0) {
            fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));
            exit(1);
        }

        /* Use a buffer large enough to hold one period */
        snd_pcm_hw_params_get_period_size(params, &frames, &dir);
        frame_size_ = frames;
    }

    void play(short* sample, int samples, int sample_rate, int channels) {
        SetSamplerateAndChannel(sample_rate, channels);

        int pos = 0;
        int size = frame_size_*channels_;
        while (pos < samples) {
            int rc = snd_pcm_writei(handle_, &sample[pos], frame_size_);
            if (rc == -EPIPE) {
                /* EPIPE means underrun */
                fprintf(stderr, "underrun occurred\n");
                snd_pcm_prepare(handle_);
            } else if (rc < 0) {
                fprintf(stderr, "error from writei: %s\n", snd_strerror(rc));
            }  else if (rc != (int)frame_size_) {
                fprintf(stderr, "short write, write %d frames\n", rc);
            }
            pos += size;
        }
    }

    ~PCMPlayer() {
        snd_pcm_drain(handle_);
        snd_pcm_close(handle_);
    }
    
protected:
    void SetSamplerateAndChannel(int sample_rate, int channels) {
        if (sample_rate_ != sample_rate || channels_ != channels) {
            PRINT("SetSamplerateAndChannel: sample rate = %d  chs = %d", sample_rate, channels);
            sample_rate_ = sample_rate;
            channels_ = channels;

            /* Allocate a hardware parameters object. */
            snd_pcm_hw_params_alloca(&params_);
            /* Fill it in with default values. */
            snd_pcm_hw_params_any(handle_, params_);
            /* Set the desired hardware parameters. */
            /* Interleaved mode */
            snd_pcm_hw_params_set_access(handle_, params_, SND_PCM_ACCESS_RW_INTERLEAVED);

            /* Signed 16-bit little-endian format */
            snd_pcm_hw_params_set_format(handle_, params_, SND_PCM_FORMAT_S16_LE);

            int dir;
            unsigned int val = sample_rate_;
            snd_pcm_hw_params_set_rate_near(handle_, params_, &val, &dir);

            /* Two channels (stereo) */
            snd_pcm_hw_params_set_channels(handle_, params_, channels_);

            /* Set period size to 32 frames. */
            snd_pcm_uframes_t frames = 0;
            snd_pcm_hw_params_set_period_size_near(handle_, params_, &frames, &dir);

            /* Write the parameters to the driver */
            int rc = snd_pcm_hw_params(handle_, params_);
            if (rc < 0) {
                fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));
                exit(1);
            }

            /* Use a buffer large enough to hold one period */
            snd_pcm_hw_params_get_period_size(params_, &frames, &dir);
            frame_size_ = frames;

            PRINT("SetSamplerateAndChannel--<");
            sleep(1);
        }
    }

private:
    int channels_;
    int sample_rate_;
    int frame_size_;
    bool init_;
    snd_pcm_t *handle_;
    snd_pcm_hw_params_t *params_;
};

#endif



版权声明:本文为ShuShengTuM原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。