00001 /* boss_g2p.h $Revision: 1.3 $ $Date: 2005/12/10 20:33:54 $ 00002 Copyright (C) 2001 University of Bonn. 00003 http://www.ikp.uni-bonn.de/boss 00004 00005 This program is free software; you can redistribute it and/or 00006 modify it under the terms of the GNU General Public License 00007 as published by the Free Software Foundation; either version 2 00008 of the License, or (at your option) any later version. 00009 00010 This program is distributed in the hope that it will be useful, 00011 but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 GNU General Public License for more details. 00014 00015 You should have received a copy of the GNU General Public License 00016 along with this program; if not, write to the Free Software 00017 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 00018 00025 #ifndef BOSSG2P_H 00026 #define BOSSG2P_H 00027 00028 using namespace std; 00029 00030 #include <iostream> 00031 #include <stdlib.h> 00032 #include <string> 00033 #include <vector> 00034 #include <map> 00035 #include <fstream> 00036 #include <sstream> 00037 #include <valarray> 00038 #include "boss_utility/boss_fsa.h" 00039 #include "boss_utility/boss_utility.h" 00040 #include "boss_utility/boss_config.h" 00041 #include "tdsp/dsp_matrix.h" 00042 #include "phimbl/phimbl.h" 00043 00044 // Classifier settings 00045 #define TRANS_TRAIN_WINDOW 4 00046 #define SYLL_TRAIN_WINDOW 4 00047 #define STRESS_TRAIN_WINDOW 4 00048 00049 #define TIMBLCLASS_SYLLABLE_BOUNDARY_PRESENT "yes" 00050 #define TIMBLCLASS_SYLLABLE_BOUNDARY_ABSENT "no" 00051 #define TIMBLCLASS_PRIMARY_LEXICAL_STRESS_PRESENT "primary" 00052 #define TIMBLCLASS_SECONDARY_LEXICAL_STRESS_PRESENT "secondary" 00053 #define TIMBLCLASS_LEXICAL_STRESS_ABSENT "no" 00054 00055 // Symbols used in incoming and resulting transcriptions 00056 00057 #define NULL_PHONEME "<NULL_PHONEME>" 00058 #define SYLLABLE_BOUNDARY_TAG "." 00059 #define WORD_BOUNDARY_TAG "#" 00060 #define PRIMARY_LEXICAL_STRESS_TAG "\"" 00061 #define SECONDARY_LEXICAL_STRESS_TAG "%" 00062 00063 typedef map< string, unsigned > t_GPTableMap; 00064 typedef enum {BLF, HADIFIX} LabelFormat; 00065 00068 class BOSS_g2p { 00069 public: 00070 BOSS_g2p(BOSS::Config & __cl); 00071 BOSS_g2p(BOSS::Config & __cl, const LabelFormat & F); 00072 BOSS_g2p(const BOSS_g2p & c); 00073 virtual BOSS_g2p & operator = (const BOSS_g2p & c); 00074 virtual ~BOSS_g2p(); 00075 string transcribe(const string & s) const; 00076 string g2p(const string & s) const; 00077 string assign_stress(const string & s) const; 00078 string syllabify(const string & s) const; 00079 protected: 00080 BOSS::Config & cl; 00081 BOSS::FSA * g2ppfsa; 00082 BOSS::FSA * syllpfsa; 00083 BOSS::FSA * stresspfsa; 00084 BOSS::t_SVector pv; 00085 void iniPointers(); 00086 void delPointers(); 00087 BOSS::t_SVector read_phoneset(const string & phonfile); 00088 vector< BOSS::t_SVector > generateG2PFeatures(BOSS::t_SVector g, BOSS::t_SVector p) const; 00089 BOSS::t_SVector string2SyllFeature(const BOSS::t_SVector & t,const unsigned & j) const; 00090 BOSS::t_SVector string2StressFeature(const BOSS::t_SVector & t,const unsigned & j) const; 00091 virtual string postProcPhon(const string & s) const { return s; } 00092 virtual string postProcSyll(const string & s) const { return s; } 00093 virtual string postProcStress(const string & s) const { return s; } 00094 virtual string postProcTrans(const string & s) const { return s; } 00095 BOSS::Phimbl phimbl; 00096 BOSS::Phimbl::t_Data g2pData; 00097 BOSS::Phimbl::t_Data syllData; 00098 BOSS::Phimbl::t_Data stressData; 00099 }; 00100 00101 #endif