00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00027 #ifndef BOSS_MORPHEMES_DE_H
00028 #define BOSS_MORPHEMES_DE_H
00029
00030 using namespace std;
00031
00032 #include <string>
00033 #include <vector>
00034 #include <map>
00035 #include <list>
00036 #include <deque>
00037 #include "boss_utility/boss_types.h"
00038 #include "boss_utility/boss_fsa.h"
00039 #include "boss_utility/boss_config.h"
00040
00041 #include "boss_mysql/boss_mysql_handler.h"
00042 #include "boss_mysql/boss_mysql_result.h"
00043
00044
00050 typedef vector< BOSS::t_SSPair > t_SSPairVector;
00051 typedef map< char, BOSS::t_SVector > t_LexiconMap;
00052 typedef t_LexiconMap::iterator t_LexiconMapIt;
00053 typedef t_LexiconMap::const_iterator t_LexiconMapCIt;
00054
00060 struct t_SubStringEntry {
00061 unsigned left;
00062 unsigned right;
00063 const string * morph;
00064 t_SubStringEntry() : left(0), right(0), morph(0) {}
00065 t_SubStringEntry(const unsigned & l, const unsigned & r, const string * m) : left(l), right(r), morph(m) {}
00066 t_SubStringEntry(const unsigned & l, const unsigned & r) : left(l), right(r), morph(0) {}
00067 };
00068 inline bool operator < (const t_SubStringEntry & a, const t_SubStringEntry & b)
00069 { return a.left < b.left; }
00070 inline ostream & operator << (ostream & out, const t_SubStringEntry & e)
00071 {
00072 if(e.morph)
00073 return out << *e.morph << " " << e.left << " " << e.right;
00074 return out << "(null)" << " " << e.left << " " << e.right;
00075 }
00076
00077 #ifdef BONT
00078
00079 struct m_line {
00080 string orth;
00081 string trans;
00082 string m_class;
00083 m_line() {;}
00084 m_line(string o, string t, string c) : orth(o), trans(t), m_class(c) {;}
00085 };
00086
00087 typedef vector< struct m_line> m_Vector;
00088 #endif
00089 typedef vector< t_SubStringEntry > t_SubStringEntryVector;
00090 typedef list< t_SubStringEntry > t_SubStringEntryList;
00091 typedef list< t_SubStringEntryList > t_SubStringEntryDList;
00092 typedef deque< string > t_SDeque;
00093 typedef map< string, string > t_SSMap;
00094
00102 class BOSS_MorphemeLexicon_DE {
00103 private:
00104 t_LexiconMap lexicon_map;
00105 BOSS::Config & cl;
00106 MySQL_Handler * data_base;
00107 public:
00108 BOSS_MorphemeLexicon_DE(BOSS::Config & cl, MySQL_Handler * data_base);
00109 t_SubStringEntryVector find(const string & word) const;
00110 };
00111
00133 class BOSS_Morphemes_DE {
00134 private:
00135 BOSS::Config & cl;
00136 MySQL_Handler * data_base;
00137
00138 BOSS::FSA *pfsa;
00139 BOSS_MorphemeLexicon_DE lexicon;
00140 t_SSPairVector selectLong(t_SubStringEntryDList & slist) const;
00141
00142 bool is_vowel(const string & phone) const;
00143 void maximum_onset(BOSS::t_SVector & coda, t_SDeque & onset) const;
00144 inline string t_SVector_to_string(const BOSS::t_SVector & v) const {
00145 string result("");
00146 for (unsigned i=0; i<v.size(); i++) result+=v[i];
00147 return result;
00148 }
00149 BOSS::t_SVector try_parse(const string & phones) const;
00150 t_SSPairVector join_stems(const t_SSPairVector & trans) const;
00151 BOSS::t_SVector syllable_border(const t_SSPairVector & joined_trans) const;
00152 BOSS::t_SVector morpheme_border(const t_SSPairVector & joined_trans, const BOSS::t_SVector & syl_chain) const;
00153 BOSS::t_SVector apply_morphonology(const t_SSPairVector & trans, const BOSS::t_SVector & final_trans) const;
00154 public:
00155 BOSS_Morphemes_DE(BOSS::Config & cl, MySQL_Handler * data_base);
00156 ~BOSS_Morphemes_DE();
00157 string operator () (const string & word) const;
00158 };
00159
00160 inline BOSS::t_SVector operator + (BOSS::t_SVector va, const BOSS::t_SVector & vb)
00161 {
00162 for (unsigned i=0; i<vb.size(); i++) va.push_back(vb[i]);
00163 return va;
00164 }
00165
00166 inline BOSS::t_SVector operator + (BOSS::t_SVector va, const string & s)
00167 {
00168 va.push_back(s);
00169 return va;
00170 }
00171
00172 inline BOSS::t_SVector operator + (const t_SDeque & d, BOSS::t_SVector v)
00173 {
00174 BOSS::t_SVector result;
00175 for (unsigned i=0; i<d.size(); i++) result.push_back(d[i]);
00176 return result+v;
00177 }
00178
00179 inline BOSS::t_SVector operator + (BOSS::t_SVector v, const t_SDeque & d)
00180 {
00181 for (unsigned i=0; i<d.size(); i++) v.push_back(d[i]);
00182 return v;
00183 }
00184
00185 inline ostream& operator << (ostream & out, const BOSS::t_SVector & v)
00186 {
00187 copy(v.begin(), v.end(), ostream_iterator<string>(out, ""));
00188 return out;
00189 }
00190
00191 inline ostream& operator << (ostream & out, const t_SDeque & d)
00192 {
00193 copy(d.begin(), d.end(), ostream_iterator<string>(out, " "));
00194 return out;
00195 }
00196
00197 #ifdef BONT
00198
00199 inline istream & operator >> (istream & in, m_line & m) {
00200 return in >> m.orth >> m.trans >> m.m_class;
00201 }
00202
00203 inline ostream & operator << (ostream & out, const m_line & m) {
00204 return out << m.orth << " " << m.trans << " " << m.m_class;
00205 }
00206
00208 inline istream & operator >> (istream & in, m_Vector & v) {
00209 copy(istream_iterator< m_line >(in), istream_iterator< m_line >(), back_inserter(v));
00210 return in;
00211 }
00212
00213 inline ostream & operator << (ostream & out, const m_Vector & v) {
00214 copy(v.begin(), v.end(), ostream_iterator< m_line >(out, "\n"));
00215 return out;
00216 }
00217 #endif
00218
00220 #endif