Main Page | Modules | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

boss_morphemes_de.h

Go to the documentation of this file.
00001 /* boss_morphemes_de.h    $Revision: 1.3 $ $Date: 2005/12/10 20:33:54 $
00002    Copyright (C) 2000 University of Bonn.
00003    http://www.ikp.uni-bonn.de/boss
00004 
00005    This program is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU General Public License
00007    as published by the Free Software Foundation; either version 2
00008    of the License, or (at your option) any later version.
00009 
00010    This program is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013    GNU General Public License for more details.
00014 
00015    You should have received a copy of the GNU General Public License
00016    along with this program; if not, write to the Free Software
00017    Foundation, Inc., 59 Temple Place - Suite 330,
00018    Boston, MA  02111-1307, USA.
00019 */
00020 
00027 #ifndef BOSS_MORPHEMES_DE_H
00028 #define BOSS_MORPHEMES_DE_H
00029 
00030 using namespace std;
00031 
00032 #include <string>
00033 #include <vector>
00034 #include <map>
00035 #include <list>
00036 #include <deque>
00037 #include "boss_utility/boss_types.h"
00038 #include "boss_utility/boss_fsa.h"
00039 #include "boss_utility/boss_config.h"
00040 
00041  #include "boss_mysql/boss_mysql_handler.h"
00042  #include "boss_mysql/boss_mysql_result.h"
00043 
00044 
00050 typedef vector< BOSS::t_SSPair >           t_SSPairVector;
00051 typedef map< char, BOSS::t_SVector >       t_LexiconMap;
00052 typedef t_LexiconMap::iterator       t_LexiconMapIt;
00053 typedef t_LexiconMap::const_iterator t_LexiconMapCIt;
00054 
00060 struct t_SubStringEntry {
00061         unsigned       left;
00062         unsigned       right;
00063         const string * morph;
00064         t_SubStringEntry() : left(0), right(0), morph(0) {}     
00065         t_SubStringEntry(const unsigned & l, const unsigned & r, const string * m) : left(l), right(r), morph(m) {}
00066         t_SubStringEntry(const unsigned & l, const unsigned & r) : left(l), right(r), morph(0) {}
00067 };
00068 inline bool operator < (const t_SubStringEntry & a, const t_SubStringEntry & b)
00069 { return a.left < b.left; }
00070 inline ostream & operator << (ostream & out, const t_SubStringEntry & e)
00071 {
00072         if(e.morph)
00073                 return out << *e.morph << " " << e.left << " " << e.right;
00074         return out << "(null)" << " " << e.left << " " << e.right;
00075 }
00076 
00077 #ifdef BONT
00078 // struct for the morpheme lexicon
00079 struct m_line {
00080   string orth;
00081   string trans;
00082   string m_class;
00083   m_line() {;}
00084   m_line(string o, string t, string c) : orth(o), trans(t), m_class(c) {;}
00085 };
00086         
00087 typedef vector< struct m_line> m_Vector;                        
00088 #endif
00089 typedef vector< t_SubStringEntry >   t_SubStringEntryVector;
00090 typedef list< t_SubStringEntry >     t_SubStringEntryList;
00091 typedef list< t_SubStringEntryList > t_SubStringEntryDList;
00092 typedef deque< string >                                                  t_SDeque;
00093 typedef map< string, string >          t_SSMap;
00094 
00102 class BOSS_MorphemeLexicon_DE {
00103 private:
00104         t_LexiconMap lexicon_map;
00105         BOSS::Config & cl;
00106         MySQL_Handler * data_base;
00107 public:
00108         BOSS_MorphemeLexicon_DE(BOSS::Config & cl, MySQL_Handler * data_base);
00109         t_SubStringEntryVector find(const string & word) const;
00110 };
00111 
00133 class BOSS_Morphemes_DE {
00134 private:
00135         BOSS::Config & cl;
00136         MySQL_Handler * data_base;
00137 
00138         BOSS::FSA *pfsa;
00139         BOSS_MorphemeLexicon_DE lexicon;
00140         t_SSPairVector selectLong(t_SubStringEntryDList & slist) const;
00141         // Joerg
00142         bool is_vowel(const string & phone) const;
00143         void maximum_onset(BOSS::t_SVector & coda, t_SDeque & onset) const;
00144         inline string t_SVector_to_string(const BOSS::t_SVector & v) const {
00145                 string result("");
00146                 for (unsigned i=0; i<v.size(); i++) result+=v[i];
00147                 return result;
00148         }
00149         BOSS::t_SVector try_parse(const string & phones) const;
00150         t_SSPairVector join_stems(const t_SSPairVector & trans) const;
00151         BOSS::t_SVector syllable_border(const t_SSPairVector & joined_trans) const;
00152         BOSS::t_SVector morpheme_border(const t_SSPairVector & joined_trans, const BOSS::t_SVector & syl_chain) const;
00153         BOSS::t_SVector apply_morphonology(const t_SSPairVector & trans, const BOSS::t_SVector & final_trans) const;
00154 public:
00155         BOSS_Morphemes_DE(BOSS::Config & cl, MySQL_Handler * data_base);
00156         ~BOSS_Morphemes_DE();
00157         string operator () (const string & word) const;
00158 };
00159 
00160 inline BOSS::t_SVector operator + (BOSS::t_SVector va, const BOSS::t_SVector & vb)
00161 {
00162         for (unsigned i=0; i<vb.size(); i++) va.push_back(vb[i]);
00163         return va;
00164 }       
00165 
00166 inline BOSS::t_SVector operator + (BOSS::t_SVector va, const string & s)
00167 {
00168         va.push_back(s);
00169         return va;
00170 }
00171 
00172 inline BOSS::t_SVector operator + (const t_SDeque & d, BOSS::t_SVector v)
00173 {
00174         BOSS::t_SVector result;
00175         for (unsigned i=0; i<d.size(); i++) result.push_back(d[i]);
00176         return result+v;
00177 }
00178 
00179 inline BOSS::t_SVector operator + (BOSS::t_SVector v, const t_SDeque & d)
00180 {
00181         for (unsigned i=0; i<d.size(); i++) v.push_back(d[i]);
00182         return v;
00183 }
00184 
00185 inline ostream& operator << (ostream & out, const BOSS::t_SVector & v)
00186 {
00187         copy(v.begin(), v.end(), ostream_iterator<string>(out, ""));
00188         return out;
00189 }
00190 
00191 inline ostream& operator << (ostream & out, const t_SDeque & d)
00192 {
00193         copy(d.begin(), d.end(), ostream_iterator<string>(out, " "));
00194         return out;
00195 }
00196 
00197 #ifdef BONT
00198 
00199 inline istream & operator >> (istream & in, m_line & m) {
00200         return in >> m.orth >> m.trans >> m.m_class;
00201 }
00202 
00203 inline ostream & operator << (ostream & out, const m_line & m) {
00204         return out << m.orth << " " << m.trans << " " << m.m_class;
00205 }
00206 
00208 inline istream & operator >> (istream & in, m_Vector & v) {
00209         copy(istream_iterator< m_line >(in), istream_iterator< m_line >(), back_inserter(v));
00210         return in;
00211 }
00212 
00213 inline ostream & operator << (ostream & out, const m_Vector & v) {
00214         copy(v.begin(), v.end(), ostream_iterator< m_line >(out, "\n"));
00215         return out;
00216 }
00217 #endif
00218  // end of boss_transcription group
00220 #endif

Generated on Tue Dec 20 23:14:37 2005 for BOSS by doxygen 1.3.6