CocoSourcesCS 4

  1 /*-------------------------------------------------------------------------
  2 ParserGen.cs -- Generation of the Recursive Descent Parser
  3 Compiler Generator Coco/R,
  4 Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
  5 extended by M. Loeberbauer & A. Woess, Univ. of Linz
  6 with improvements by Pat Terry, Rhodes University
  7 
  8 This program is free software; you can redistribute it and/or modify it 
  9 under the terms of the GNU General Public License as published by the 
 10 Free Software Foundation; either version 2, or (at your option) any 
 11 later version.
 12 
 13 This program is distributed in the hope that it will be useful, but 
 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
 15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
 16 for more details.
 17 
 18 You should have received a copy of the GNU General Public License along 
 19 with this program; if not, write to the Free Software Foundation, Inc., 
 20 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 21 
 22 As an exception, it is allowed to write an extension of Coco/R that is
 23 used as a plugin in non-free software.
 24 
 25 If not otherwise stated, any source code generated by Coco/R (other than 
 26 Coco/R itself) does not fall under the GNU General Public License.
 27 -------------------------------------------------------------------------*/
 28 using System;
 29 using System.IO;
 30 using System.Collections;
 31 using System.Text;
 32 
 33 namespace at.jku.ssw.Coco {
 34 
 35 public class ParserGen {
 36 
 37     const int maxTerm = 3;        // sets of size < maxTerm are enumerated
 38     const char CR  = '
';
 39     const char LF  = '
';
 40     const int EOF = -1;
 41 
 42     const int tErr = 0;            // error codes
 43     const int altErr = 1;
 44     const int syncErr = 2;
 45     
 46     public Position usingPos; // "using" definitions from the attributed grammar
 47 
 48     int errorNr;      // highest parser error number
 49     Symbol curSy;     // symbol whose production is currently generated
 50     FileStream fram;  // parser frame file
 51     StreamWriter gen; // generated parser source file
 52     StringWriter err; // generated parser error messages
 53     ArrayList symSet = new ArrayList();
 54     
 55     Tab tab;          // other Coco objects
 56     TextWriter trace;
 57     Errors errors;
 58     Buffer buffer;
 59     
 60     void Indent (int n) {
 61         for (int i = 1; i <= n; i++) gen.Write('	');
 62     }
 63     
 64     
 65     bool Overlaps(BitArray s1, BitArray s2) {
 66         int len = s1.Count;
 67         for (int i = 0; i < len; ++i) {
 68             if (s1[i] && s2[i]) {
 69                 return true;
 70             }
 71         }
 72         return false;
 73     }
 74     
 75     // use a switch if more than 5 alternatives and none starts with a resolver, and no LL1 warning
 76     bool UseSwitch (Node p) {
 77         BitArray s1, s2;
 78         if (p.typ != Node.alt) return false;
 79         int nAlts = 0;
 80         s1 = new BitArray(tab.terminals.Count);
 81         while (p != null) {
 82             s2 = tab.Expected0(p.sub, curSy);
 83             // must not optimize with switch statement, if there are ll1 warnings
 84             if (Overlaps(s1, s2)) { return false; }
 85             s1.Or(s2);
 86             ++nAlts;
 87             // must not optimize with switch-statement, if alt uses a resolver expression
 88             if (p.sub.typ == Node.rslv) return false;
 89             p = p.down;
 90         }
 91         return nAlts > 5;
 92     }
 93 
 94     void CopySourcePart (Position pos, int indent) {
 95         // Copy text described by pos from atg to gen
 96         int ch, i;
 97         if (pos != null) {
 98             buffer.Pos = pos.beg; ch = buffer.Read();
 99             if (tab.emitLines) {
100                 gen.WriteLine();
101                 gen.WriteLine("#line {0} "{1}"", pos.line, tab.srcName);
102             }
103             Indent(indent);
104             while (buffer.Pos <= pos.end) {
105                 while (ch == CR || ch == LF) {  // eol is either CR or CRLF or LF
106                     gen.WriteLine(); Indent(indent);
107                     if (ch == CR) ch = buffer.Read(); // skip CR
108                     if (ch == LF) ch = buffer.Read(); // skip LF
109                     for (i = 1; i <= pos.col && (ch == ' ' || ch == '	'); i++) { 
110                         // skip blanks at beginning of line
111                         ch = buffer.Read();
112                     }
113                     if (buffer.Pos > pos.end) goto done;
114                 }
115                 gen.Write((char)ch);
116                 ch = buffer.Read();
117             }
118             done:
119             if (indent > 0) gen.WriteLine();
120         }
121     }
122 
123     void GenErrorMsg (int errTyp, Symbol sym) {
124         errorNr++;
125         err.Write("			case " + errorNr + ": s = "");
126         switch (errTyp) {
127             case tErr: 
128                 if (sym.name[0] == '"') err.Write(tab.Escape(sym.name) + " expected");
129                 else err.Write(sym.name + " expected"); 
130                 break;
131             case altErr: err.Write("invalid " + sym.name); break;
132             case syncErr: err.Write("this symbol not expected in " + sym.name); break;
133         }
134         err.WriteLine(""; break;");
135     }
136     
137     int NewCondSet (BitArray s) {
138         for (int i = 1; i < symSet.Count; i++) // skip symSet[0] (reserved for union of SYNC sets)
139             if (Sets.Equals(s, (BitArray)symSet[i])) return i;
140         symSet.Add(s.Clone());
141         return symSet.Count - 1;
142     }
143     
144     void GenCond (BitArray s, Node p) {
145         if (p.typ == Node.rslv) CopySourcePart(p.pos, 0);
146         else {
147             int n = Sets.Elements(s);
148             if (n == 0) gen.Write("false"); // happens if an ANY set matches no symbol
149             else if (n <= maxTerm)
150                 foreach (Symbol sym in tab.terminals) {
151                     if (s[sym.n]) {
152                         gen.Write("la.kind == {0}", sym.n);
153                         --n;
154                         if (n > 0) gen.Write(" || ");
155                     }
156                 }
157             else
158                 gen.Write("StartOf({0})", NewCondSet(s));
159         }
160     }
161         
162     void PutCaseLabels (BitArray s) {
163         foreach (Symbol sym in tab.terminals)
164             if (s[sym.n]) gen.Write("case {0}: ", sym.n);
165     }
166     
167     void GenCode (Node p, int indent, BitArray isChecked) {
168         Node p2;
169         BitArray s1, s2;
170         while (p != null) {
171             switch (p.typ) {
172                 case Node.nt: {
173                     Indent(indent);
174                     gen.Write(p.sym.name + "(");
175                     CopySourcePart(p.pos, 0);
176                     gen.WriteLine(");");
177                     break;
178                 }
179                 case Node.t: {
180                     Indent(indent);
181                     // assert: if isChecked[p.sym.n] is true, then isChecked contains only p.sym.n
182                     if (isChecked[p.sym.n]) gen.WriteLine("Get();");
183                     else gen.WriteLine("Expect({0});", p.sym.n);
184                     break;
185                 }
186                 case Node.wt: {
187                     Indent(indent);
188                     s1 = tab.Expected(p.next, curSy);
189                     s1.Or(tab.allSyncSets);
190                     gen.WriteLine("ExpectWeak({0}, {1});", p.sym.n, NewCondSet(s1));
191                     break;
192                 }
193                 case Node.any: {
194                     Indent(indent);
195                     int acc = Sets.Elements(p.set);
196                     if (tab.terminals.Count == (acc + 1) || (acc > 0 && Sets.Equals(p.set, isChecked))) {
197                         // either this ANY accepts any terminal (the + 1 = end of file), or exactly what's allowed here
198                         gen.WriteLine("Get();");
199                     } else {
200                         GenErrorMsg(altErr, curSy);
201                         if (acc > 0) {
202                             gen.Write("if ("); GenCond(p.set, p); gen.WriteLine(") Get(); else SynErr({0});", errorNr);
203                         } else gen.WriteLine("SynErr({0}); // ANY node that matches no symbol", errorNr);
204                     }
205                     break;
206                 }
207                 case Node.eps: break; // nothing
208                 case Node.rslv: break; // nothing
209                 case Node.sem: {
210                     CopySourcePart(p.pos, indent);
211                     break;
212                 }
213                 case Node.sync: {
214                     Indent(indent);
215                     GenErrorMsg(syncErr, curSy);
216                     s1 = (BitArray)p.set.Clone();
217                     gen.Write("while (!("); GenCond(s1, p); gen.Write(")) {");
218                     gen.Write("SynErr({0}); Get();", errorNr); gen.WriteLine("}");
219                     break;
220                 }
221                 case Node.alt: {
222                     s1 = tab.First(p);
223                     bool equal = Sets.Equals(s1, isChecked);
224                     bool useSwitch = UseSwitch(p);
225                     if (useSwitch) { Indent(indent); gen.WriteLine("switch (la.kind) {"); }
226                     p2 = p;
227                     while (p2 != null) {
228                         s1 = tab.Expected(p2.sub, curSy);
229                         Indent(indent);
230                         if (useSwitch) { 
231                             PutCaseLabels(s1); gen.WriteLine("{");
232                         } else if (p2 == p) { 
233                             gen.Write("if ("); GenCond(s1, p2.sub); gen.WriteLine(") {"); 
234                         } else if (p2.down == null && equal) { gen.WriteLine("} else {");
235                         } else { 
236                             gen.Write("} else if (");  GenCond(s1, p2.sub); gen.WriteLine(") {"); 
237                         }
238                         GenCode(p2.sub, indent + 1, s1);
239                         if (useSwitch) {
240                             Indent(indent); gen.WriteLine("	break;");
241                             Indent(indent); gen.WriteLine("}");
242                         }
243                         p2 = p2.down;
244                     }
245                     Indent(indent);
246                     if (equal) {
247                         gen.WriteLine("}");
248                     } else {
249                         GenErrorMsg(altErr, curSy);
250                         if (useSwitch) {
251                             gen.WriteLine("default: SynErr({0}); break;", errorNr);
252                             Indent(indent); gen.WriteLine("}");
253                         } else {
254                             gen.Write("} "); gen.WriteLine("else SynErr({0});", errorNr);
255                         }
256                     }
257                     break;
258                 }
259                 case Node.iter: {
260                     Indent(indent);
261                     p2 = p.sub;
262                     gen.Write("while (");
263                     if (p2.typ == Node.wt) {
264                         s1 = tab.Expected(p2.next, curSy);
265                         s2 = tab.Expected(p.next, curSy);
266                         gen.Write("WeakSeparator({0},{1},{2}) ", p2.sym.n, NewCondSet(s1), NewCondSet(s2));
267                         s1 = new BitArray(tab.terminals.Count);  // for inner structure
268                         if (p2.up || p2.next == null) p2 = null; else p2 = p2.next;
269                     } else {
270                         s1 = tab.First(p2);
271                         GenCond(s1, p2);
272                     }
273                     gen.WriteLine(") {");
274                     GenCode(p2, indent + 1, s1);
275                     Indent(indent); gen.WriteLine("}");
276                     break;
277                 }
278                 case Node.opt:
279                     s1 = tab.First(p.sub);
280                     Indent(indent);
281                     gen.Write("if ("); GenCond(s1, p.sub); gen.WriteLine(") {");
282                     GenCode(p.sub, indent + 1, s1);
283                     Indent(indent); gen.WriteLine("}");
284                     break;
285             }
286             if (p.typ != Node.eps && p.typ != Node.sem && p.typ != Node.sync) 
287                 isChecked.SetAll(false);  // = new BitArray(tab.terminals.Count);
288             if (p.up) break;
289             p = p.next;
290         }
291     }
292     
293     void GenTokens() {
294         foreach (Symbol sym in tab.terminals) {
295             if (Char.IsLetter(sym.name[0]))
296                 gen.WriteLine("	public const int _{0} = {1};", sym.name, sym.n);
297         }
298     }
299     
300     void GenPragmas() {
301         foreach (Symbol sym in tab.pragmas) {
302             gen.WriteLine("	public const int _{0} = {1};", sym.name, sym.n);
303         }
304     }
305 
306     void GenCodePragmas() {
307         foreach (Symbol sym in tab.pragmas) {
308             gen.WriteLine("				if (la.kind == {0}) {{", sym.n);
309             CopySourcePart(sym.semPos, 4);
310             gen.WriteLine("				}");
311         }
312     }
313 
314     void GenProductions() {
315         foreach (Symbol sym in tab.nonterminals) {
316             curSy = sym;
317             gen.Write("	void {0}(", sym.name);
318             CopySourcePart(sym.attrPos, 0);
319             gen.WriteLine(") {");
320             CopySourcePart(sym.semPos, 2);
321             GenCode(sym.graph, 2, new BitArray(tab.terminals.Count));
322             gen.WriteLine("	}"); gen.WriteLine();
323         }
324     }
325 
326     void InitSets() {
327         for (int i = 0; i < symSet.Count; i++) {
328             BitArray s = (BitArray)symSet[i];
329             gen.Write("		{");
330             int j = 0;
331             foreach (Symbol sym in tab.terminals) {
332                 if (s[sym.n]) gen.Write("_T,"); else gen.Write("_x,");
333                 ++j;
334                 if (j%4 == 0) gen.Write(" ");
335             }
336             if (i == symSet.Count-1) gen.WriteLine("_x}"); else gen.WriteLine("_x},");
337         }
338     }
339 
340     public void WriteParser () {
341         Generator g = new Generator(tab);
342         int oldPos = buffer.Pos;  // Pos is modified by CopySourcePart
343         symSet.Add(tab.allSyncSets);
344 
345         fram = g.OpenFrame("Parser.frame");
346         gen = g.OpenGen("Parser.cs");
347         err = new StringWriter();
348         foreach (Symbol sym in tab.terminals) GenErrorMsg(tErr, sym);
349         
350         g.GenCopyright();
351         g.SkipFramePart("-->begin");
352 
353         if (usingPos != null) { CopySourcePart(usingPos, 0); gen.WriteLine(); }
354         g.CopyFramePart("-->namespace");
355         /* AW open namespace, if it exists */
356         if (tab.nsName != null && tab.nsName.Length > 0) {
357             gen.WriteLine("namespace {0} {{", tab.nsName);
358             gen.WriteLine();
359         }
360         g.CopyFramePart("-->constants");
361         GenTokens(); /* ML 2002/09/07 write the token kinds */
362         gen.WriteLine("	public const int maxT = {0};", tab.terminals.Count-1);
363         GenPragmas(); /* ML 2005/09/23 write the pragma kinds */
364         g.CopyFramePart("-->declarations"); CopySourcePart(tab.semDeclPos, 0);
365         g.CopyFramePart("-->pragmas"); GenCodePragmas();
366         g.CopyFramePart("-->productions"); GenProductions();
367         g.CopyFramePart("-->parseRoot"); gen.WriteLine("		{0}();", tab.gramSy.name); if (tab.checkEOF) gen.WriteLine("		Expect(0);");
368         g.CopyFramePart("-->initialization"); InitSets();
369         g.CopyFramePart("-->errors"); gen.Write(err.ToString());
370         g.CopyFramePart(null);
371         /* AW 2002-12-20 close namespace, if it exists */
372         if (tab.nsName != null && tab.nsName.Length > 0) gen.Write("}");
373         gen.Close();
374         buffer.Pos = oldPos;
375     }
376     
377     public void WriteStatistics () {
378         trace.WriteLine();
379         trace.WriteLine("{0} terminals", tab.terminals.Count);
380         trace.WriteLine("{0} symbols", tab.terminals.Count + tab.pragmas.Count +
381                                        tab.nonterminals.Count);
382         trace.WriteLine("{0} nodes", tab.nodes.Count);
383         trace.WriteLine("{0} sets", symSet.Count);
384     }
385 
386     public ParserGen (Parser parser) {
387         tab = parser.tab;
388         errors = parser.errors;
389         trace = parser.trace;
390         buffer = parser.scanner.buffer;
391         errorNr = -1;
392         usingPos = null;
393     }
394 
395 } // end ParserGen
396 
397 } // end namespace