Taisir Jibian Rahi
Published © MIT

Bengali Speech Recognition

Here we have analyzed real life audio signals of “Khule Dao” and “Bondho Koro”.

AdvancedFull instructions providedOver 8 days150
Bengali Speech Recognition

Things used in this project

Software apps and online services

MATLAB
MATLAB

Story

Read more

Schematics

Bengali Speech Recognition Flow Chart

Code

Bengali Speech Recognition Code

MATLAB
clc;clear all;close all;
c1=0;   c2=0;
sum1=0; sum2=0; sum3=0; sum4=0;
No_of_khule_dao=0;  No_of_bondho_koro=0;
Fs=44100;       % sampling frequency
disp('The code is running in MATLAB, this may take a while')

%%Training data for khule dao
for i = 1:1
    s1 ='Z:\Open_Ended\Train_Data\Train_Open\OP-';
    s2 = num2str(i);
    s3 = '.mp3'; 
    file1 = strcat(s1, s2, s3);     % file location, name and type of the data

    if exist (file1, 'file') == 2       % checking twice if the file exist 
        [y,t] = audioread(file1);       % reading the audio file 
        NFFT=length(y);
        x=fft(y,NFFT);
        x1=abs(x);      % only magnitude is taken 
        F=((0:1/NFFT:1-1/NFFT)*Fs);
        max_amp=max(x1);        % maximum amplitude value is taken
        b=find(x1==max_amp(1));     % only one value is taken, if there is sevaral same maximum amplitude value
        F_KD_max(i)=F(b(1));        % for that maximum amplitude value frequency is found 
        min_amp=min(x1);        % minimmum amplitude value is taken
        d=find(x1==min_amp(1));     % only one value is taken, if there is sevaral same minimmum amplitude value
        F_KD_min(i)=F(d(1));        % for that minimmum amplitude value frequency is found 
        if ((F_KD_max(i) >100 &&  F_KD_max(i) <600) && (F_KD_min(i) >13000  && F_KD_min(i) <21900) )
        c1 = c1 +1;     % number of data taken is counted
        sum1=sum1+F_KD_max(i);        % the maximum frequencies are added
        sum2=sum2+F_KD_min(i);      % the minimmum frequencies are added
        end
    end
end
average_max_frequency_of_Khule_Dao=sum1/c1;      % the macximum valued frequencies are averaged
average_min_frequency_of_Khule_Dao=sum2/c1;     % the minimmum valued frequencies are averaged
%plot(F,x1)
plot(F,y)
%%Training data for Bondho koro 
for i = 1:116
    s1 = 'Z:\Open_Ended\Train_Data\Train_Close\CL-';
    s2 = num2str(i);
    s3 = '.mp3'; 
    file2 = strcat(s1, s2, s3);     % file location, name and type of the data
    if exist (file2, 'file') == 2       % checking twice if the file exist 
        [y,t] = audioread(file2);       % reading the audio file
        NFFT=length(y);
        x=fft(y,NFFT);
        x1=abs(x);      % only magnitude is taken
        F=((0:1/NFFT:1-1/NFFT)*Fs);
        max_amp=max(x1);        % maximum amplitude value is found
        b=find(x1==max_amp(1));      % only one value is taken, if there is sevaral same maximum amplitude value
        F_BK_max(i)=F(b(1));        % for that maximum amplitude value frequency is found
        min_amp=min(x1);        % minimmum amplitude value is taken
        d=find(x1==min_amp(1));      % only one value is taken, if there is sevaral same minimmum amplitude value
        F_BK_min(i)=F(d(1));         % for that minimmum amplitude value frequency is found
        if ((F_BK_max(i) >50 && F_BK_max(i) <580) && (F_BK_min(i) >15000  && F_BK_min(i) <21900))
            c2 = c2 +1;     % number of data taken is counted
        sum3=sum3+F_BK_max(i);      % the maximum frequencies are added
        sum4=sum4+F_BK_min(i);      % the minimmum frequencies are added
        end
    end
end
average_max_frequency_of_bondo_koro=sum3/c2;        % the macximum valued frequencies are averaged
average_min_frequency_of_bondo_koro=sum4/c2;        % the minimmum valued frequencies are averaged

%%Test data checking wheather is it khule dao or bondho koro 
for i= 1:20
        s1 = 'Z:\Open_Ended\Test_Data\Test_Data\T-';%C:\Users\Asus\Desktop\Open_Ended\Test_Data\Test_Data/T-
        s2 = num2str(i);
        s3 = '.mp3'; 
        file = strcat(s1, s2, s3);      % file location, name and type of the data 
        [y,t] = audioread(file);        % reading the audio file 
        NFFT=length(y);
        x=fft(y,NFFT);
        x1=abs(x);
        F=((0:1/NFFT:1-1/NFFT)*Fs);
        max_amp=max(x1);        % maximum amplitude value is found
        b=find(x1==max_amp(1));     % only one value is taken, if there is sevaral same maximum amplitude value
        F_max=F(b(1));      % for that maximum amplitude value frequency is found
        min_amp=min(x1);        % minimmum amplitude value is taken        
        b=find(x1==min_amp(1));     % only one value is taken, if there is sevaral same minimmum amplitude value
        F_min=F(b(1));      % for that minimmum amplitude value frequency is found
        Diff_khule_dao_max= abs(F_max-average_max_frequency_of_Khule_Dao);
        Diff_khule_dao_min= abs(F_min-average_min_frequency_of_Khule_Dao);
        Diff_bondo_koro_max= abs(F_max-average_max_frequency_of_bondo_koro);
        Diff_bondo_koro_min= abs(F_min-average_min_frequency_of_bondo_koro);
        if((Diff_bondo_koro_max >= Diff_khule_dao_max) && (Diff_bondo_koro_min >= Diff_khule_dao_min))
            disp('Khule Dao')
          No_of_khule_dao=No_of_khule_dao+1;
        else
            disp('Bondo Koro')
            No_of_bondho_koro=No_of_bondho_koro+1;
        end 
end 
No_of_khule_dao
No_of_bondho_koro

Credits

Taisir Jibian Rahi
7 projects • 7 followers
Never regret in life.
Contact

Comments

Please log in or sign up to comment.