• 大小: 4KB
    文件类型: .py
    金币: 1
    下载: 0 次
    发布日期: 2021-06-02
  • 语言: C/C++
  • 标签: python  

资源简介

对中科大发布的中文语音情感数据库CASIA,提取了MFCC特征,过零率等特征,采用SVM分类,识别率很低,只适合初学者了解语音情感识别过程

资源截图

代码片段和文件信息

import librosa
import os
from random import shuffle
import numpy as np
from sklearn import svm
import sklearn


path = r‘I:\CFL\cfl_python_speech_emotion\casia‘
EMOTION_LABEL = {‘angry‘: ‘1‘ ‘fear‘: ‘2‘ ‘happy‘: ‘3‘ ‘neutral‘: ‘4‘ ‘sad‘: ‘5‘ ‘surprise‘: ‘6‘}


def getData(mfcc_feature_num=16):
    wav_file_path = []
    person_dirs = os.listdir(path)
    for person in person_dirs:
        if person.endswith(‘.txt‘):
            continue
        emotion_dir_path = os.path.join(path person)
        emotion_dirs = os.listdir(emotion_dir_path)
        for emotion_dir in emotion_dirs:
            if emotion_dir.endswith(‘ini‘):
                continue
            emotion_file_path = os.path.join(emotion_dir_path emotion_dir)
            emotion_files = os.listdir(emotion_file_path)
            for file in emotion_files:
                if not file.endswith(‘wav‘):
                    continue
                wav_path = os.path.join(emotion_file_path file)
                wav_file_path.append(wav_path)
    shuffle(wav_file_path)#将语音文件随机排列
    data_feature = []
    data_labels = []

    for wav_file in wav_file_path:
        y sr = librosa.load(wav_file)

        mfcc_feature = librosa.feature.mfcc(y sr n_mfcc=16)
        zcr_feature = librosa.feature.zero_crossing_rate(y)
        energy_feature = librosa.feature.rmse(y)
        rms_feature=librosa.feature.rmse(y)

        mfcc_feature = mfcc_feature.T.flatten()[:mfcc_feature_num]
        zcr_feature = zcr_feature.flatten()
        energy_feature = energy_feature.flatten()
        rms_feature=rms_feature.flatten()

        zcr_feature = np.array([np.mean(zcr_feature)])
        energy_feature = np.array([np.mean(energy_feature)])
        rms_feature=np.array([np.mean(rms_feature)])

        data_feature.append(np.concatenate((mfcc_feature zcr_feature energy_featurerms_feature)))
        data_labels.append(int(EMOTION_LABEL[wav_file.split(‘\\‘)[-2]]))
    return np.array(data_feature) np.array(data_labels)


def test():
    best_acc = 0
    best_mfcc_feature_num = 0
    for i in range(100 200):
        

评论

共有 条评论