00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00025 #ifndef TRAING2P_H
00026 #define TRAING2P_H
00027
00028
00029 #include <iostream>
00030 #include <stdlib.h>
00031 #include <vector>
00032 #include <string>
00033 #include "boss_g2p/boss_g2p.h"
00034 #include "boss_g2pmatrix.h"
00035 #include "phimbl/phimbl.h"
00036
00037
00042 #define MAX_NUMBER_OF_ALIGNMENTS 1000
00043
00044 struct t_MappingTableEntry {
00045 string key;
00046 string target;
00047 };
00048
00049 inline istream & operator >> (istream &in, t_MappingTableEntry & mte)
00050 { return in >> mte.key >> mte.target; }
00051
00052 typedef map< string, string > t_MappingTableMap;
00053 typedef enum {G2P, SYLL} TrainingType;
00054
00055 struct t_Alignment {
00056 string word;
00057 BOSS::t_SVector graph;
00058 vector<BOSS::t_SVector> phon;
00059 BOSS::t_SVector best;
00060 bool accepted;
00061 void size() { cerr << graph.size() << " " << phon.size() << " " << best.size() << endl; }
00062 };
00063
00064
00065 class Traing2p : public BOSS_g2p {
00066 public:
00067 Traing2p(BOSS::Config & config) : BOSS_g2p(config) {}
00068 Traing2p(const BOSS_g2p & c) : BOSS_g2p(c) {}
00069 Traing2p(BOSS::Config & config, const string & traintype, const bool & createflag, const string & phonfile, const string & treefile, const string & lexfile, const string & featurefile, const LabelFormat & F);
00070
00071 protected:
00072 void trainphon(const string & lexfile, const string & treefile, const string & featurefile, const LabelFormat & F);
00073 void trainsyll(const string & lexfile, const string & treefile, const string & featurefile, const LabelFormat & F);
00074 void trainstress(const string & lexfile, const string & treefile, const string & featurefile, const LabelFormat & F);
00075 void create_tree(const string & featurefile, const string & treefile);
00076 string clean_blftrans(const string & s);
00077 string clean_blftrans(const string & s, bool syll_flag);
00078 string remove_quotes(const string & s);
00079 BOSS::t_SVector extract_graphemes(t_MappingTableMap & mt);
00080 t_Alignment align_pair(const string & graph, const string & phon, BOSS_g2pMatrix & gpm);
00081 void assign_scores(const t_Alignment & al, BOSS_g2pMatrix & gpm);
00082 t_Alignment find_bestAlignment(t_Alignment al, BOSS_g2pMatrix & gpm);
00083 BOSS::t_SVector generateSyllFeatures(const BOSS::t_SVector & t);
00084 BOSS::t_SVector generateStressFeatures(const BOSS::t_SVector & t);
00085 BOSS::t_SVector generate_trainset(BOSS_g2pMatrix & gpm);
00086 void print_matrix(BOSS_g2pMatrix & m);
00087 double binomial(const double & n, const double k);
00088 BOSS::Phimbl phimbl;
00089 };
00090
00091 #endif
00092