1 /*-------------------------------------------------------------------------
2 ParserGen.cs -- Generation of the Recursive Descent Parser
3 Compiler Generator Coco/R,
4 Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
5 extended by M. Loeberbauer & A. Woess, Univ. of Linz
6 with improvements by Pat Terry, Rhodes University
7
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
12
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21
22 As an exception, it is allowed to write an extension of Coco/R that is
23 used as a plugin in non-free software.
24
25 If not otherwise stated, any source code generated by Coco/R (other than
26 Coco/R itself) does not fall under the GNU General Public License.
27 -------------------------------------------------------------------------*/
28 using System;
29 using System.IO;
30 using System.Collections;
31 using System.Text;
32
33 namespace at.jku.ssw.Coco {
34
35 public class ParserGen {
36
37 const int maxTerm = 3; // sets of size < maxTerm are enumerated
38 const char CR = '
';
39 const char LF = '
';
40 const int EOF = -1;
41
42 const int tErr = 0; // error codes
43 const int altErr = 1;
44 const int syncErr = 2;
45
46 public Position usingPos; // "using" definitions from the attributed grammar
47
48 int errorNr; // highest parser error number
49 Symbol curSy; // symbol whose production is currently generated
50 FileStream fram; // parser frame file
51 StreamWriter gen; // generated parser source file
52 StringWriter err; // generated parser error messages
53 ArrayList symSet = new ArrayList();
54
55 Tab tab; // other Coco objects
56 TextWriter trace;
57 Errors errors;
58 Buffer buffer;
59
60 void Indent (int n) {
61 for (int i = 1; i <= n; i++) gen.Write(' ');
62 }
63
64
65 bool Overlaps(BitArray s1, BitArray s2) {
66 int len = s1.Count;
67 for (int i = 0; i < len; ++i) {
68 if (s1[i] && s2[i]) {
69 return true;
70 }
71 }
72 return false;
73 }
74
75 // use a switch if more than 5 alternatives and none starts with a resolver, and no LL1 warning
76 bool UseSwitch (Node p) {
77 BitArray s1, s2;
78 if (p.typ != Node.alt) return false;
79 int nAlts = 0;
80 s1 = new BitArray(tab.terminals.Count);
81 while (p != null) {
82 s2 = tab.Expected0(p.sub, curSy);
83 // must not optimize with switch statement, if there are ll1 warnings
84 if (Overlaps(s1, s2)) { return false; }
85 s1.Or(s2);
86 ++nAlts;
87 // must not optimize with switch-statement, if alt uses a resolver expression
88 if (p.sub.typ == Node.rslv) return false;
89 p = p.down;
90 }
91 return nAlts > 5;
92 }
93
94 void CopySourcePart (Position pos, int indent) {
95 // Copy text described by pos from atg to gen
96 int ch, i;
97 if (pos != null) {
98 buffer.Pos = pos.beg; ch = buffer.Read();
99 if (tab.emitLines) {
100 gen.WriteLine();
101 gen.WriteLine("#line {0} "{1}"", pos.line, tab.srcName);
102 }
103 Indent(indent);
104 while (buffer.Pos <= pos.end) {
105 while (ch == CR || ch == LF) { // eol is either CR or CRLF or LF
106 gen.WriteLine(); Indent(indent);
107 if (ch == CR) ch = buffer.Read(); // skip CR
108 if (ch == LF) ch = buffer.Read(); // skip LF
109 for (i = 1; i <= pos.col && (ch == ' ' || ch == ' '); i++) {
110 // skip blanks at beginning of line
111 ch = buffer.Read();
112 }
113 if (buffer.Pos > pos.end) goto done;
114 }
115 gen.Write((char)ch);
116 ch = buffer.Read();
117 }
118 done:
119 if (indent > 0) gen.WriteLine();
120 }
121 }
122
123 void GenErrorMsg (int errTyp, Symbol sym) {
124 errorNr++;
125 err.Write(" case " + errorNr + ": s = "");
126 switch (errTyp) {
127 case tErr:
128 if (sym.name[0] == '"') err.Write(tab.Escape(sym.name) + " expected");
129 else err.Write(sym.name + " expected");
130 break;
131 case altErr: err.Write("invalid " + sym.name); break;
132 case syncErr: err.Write("this symbol not expected in " + sym.name); break;
133 }
134 err.WriteLine(""; break;");
135 }
136
137 int NewCondSet (BitArray s) {
138 for (int i = 1; i < symSet.Count; i++) // skip symSet[0] (reserved for union of SYNC sets)
139 if (Sets.Equals(s, (BitArray)symSet[i])) return i;
140 symSet.Add(s.Clone());
141 return symSet.Count - 1;
142 }
143
144 void GenCond (BitArray s, Node p) {
145 if (p.typ == Node.rslv) CopySourcePart(p.pos, 0);
146 else {
147 int n = Sets.Elements(s);
148 if (n == 0) gen.Write("false"); // happens if an ANY set matches no symbol
149 else if (n <= maxTerm)
150 foreach (Symbol sym in tab.terminals) {
151 if (s[sym.n]) {
152 gen.Write("la.kind == {0}", sym.n);
153 --n;
154 if (n > 0) gen.Write(" || ");
155 }
156 }
157 else
158 gen.Write("StartOf({0})", NewCondSet(s));
159 }
160 }
161
162 void PutCaseLabels (BitArray s) {
163 foreach (Symbol sym in tab.terminals)
164 if (s[sym.n]) gen.Write("case {0}: ", sym.n);
165 }
166
167 void GenCode (Node p, int indent, BitArray isChecked) {
168 Node p2;
169 BitArray s1, s2;
170 while (p != null) {
171 switch (p.typ) {
172 case Node.nt: {
173 Indent(indent);
174 gen.Write(p.sym.name + "(");
175 CopySourcePart(p.pos, 0);
176 gen.WriteLine(");");
177 break;
178 }
179 case Node.t: {
180 Indent(indent);
181 // assert: if isChecked[p.sym.n] is true, then isChecked contains only p.sym.n
182 if (isChecked[p.sym.n]) gen.WriteLine("Get();");
183 else gen.WriteLine("Expect({0});", p.sym.n);
184 break;
185 }
186 case Node.wt: {
187 Indent(indent);
188 s1 = tab.Expected(p.next, curSy);
189 s1.Or(tab.allSyncSets);
190 gen.WriteLine("ExpectWeak({0}, {1});", p.sym.n, NewCondSet(s1));
191 break;
192 }
193 case Node.any: {
194 Indent(indent);
195 int acc = Sets.Elements(p.set);
196 if (tab.terminals.Count == (acc + 1) || (acc > 0 && Sets.Equals(p.set, isChecked))) {
197 // either this ANY accepts any terminal (the + 1 = end of file), or exactly what's allowed here
198 gen.WriteLine("Get();");
199 } else {
200 GenErrorMsg(altErr, curSy);
201 if (acc > 0) {
202 gen.Write("if ("); GenCond(p.set, p); gen.WriteLine(") Get(); else SynErr({0});", errorNr);
203 } else gen.WriteLine("SynErr({0}); // ANY node that matches no symbol", errorNr);
204 }
205 break;
206 }
207 case Node.eps: break; // nothing
208 case Node.rslv: break; // nothing
209 case Node.sem: {
210 CopySourcePart(p.pos, indent);
211 break;
212 }
213 case Node.sync: {
214 Indent(indent);
215 GenErrorMsg(syncErr, curSy);
216 s1 = (BitArray)p.set.Clone();
217 gen.Write("while (!("); GenCond(s1, p); gen.Write(")) {");
218 gen.Write("SynErr({0}); Get();", errorNr); gen.WriteLine("}");
219 break;
220 }
221 case Node.alt: {
222 s1 = tab.First(p);
223 bool equal = Sets.Equals(s1, isChecked);
224 bool useSwitch = UseSwitch(p);
225 if (useSwitch) { Indent(indent); gen.WriteLine("switch (la.kind) {"); }
226 p2 = p;
227 while (p2 != null) {
228 s1 = tab.Expected(p2.sub, curSy);
229 Indent(indent);
230 if (useSwitch) {
231 PutCaseLabels(s1); gen.WriteLine("{");
232 } else if (p2 == p) {
233 gen.Write("if ("); GenCond(s1, p2.sub); gen.WriteLine(") {");
234 } else if (p2.down == null && equal) { gen.WriteLine("} else {");
235 } else {
236 gen.Write("} else if ("); GenCond(s1, p2.sub); gen.WriteLine(") {");
237 }
238 GenCode(p2.sub, indent + 1, s1);
239 if (useSwitch) {
240 Indent(indent); gen.WriteLine(" break;");
241 Indent(indent); gen.WriteLine("}");
242 }
243 p2 = p2.down;
244 }
245 Indent(indent);
246 if (equal) {
247 gen.WriteLine("}");
248 } else {
249 GenErrorMsg(altErr, curSy);
250 if (useSwitch) {
251 gen.WriteLine("default: SynErr({0}); break;", errorNr);
252 Indent(indent); gen.WriteLine("}");
253 } else {
254 gen.Write("} "); gen.WriteLine("else SynErr({0});", errorNr);
255 }
256 }
257 break;
258 }
259 case Node.iter: {
260 Indent(indent);
261 p2 = p.sub;
262 gen.Write("while (");
263 if (p2.typ == Node.wt) {
264 s1 = tab.Expected(p2.next, curSy);
265 s2 = tab.Expected(p.next, curSy);
266 gen.Write("WeakSeparator({0},{1},{2}) ", p2.sym.n, NewCondSet(s1), NewCondSet(s2));
267 s1 = new BitArray(tab.terminals.Count); // for inner structure
268 if (p2.up || p2.next == null) p2 = null; else p2 = p2.next;
269 } else {
270 s1 = tab.First(p2);
271 GenCond(s1, p2);
272 }
273 gen.WriteLine(") {");
274 GenCode(p2, indent + 1, s1);
275 Indent(indent); gen.WriteLine("}");
276 break;
277 }
278 case Node.opt:
279 s1 = tab.First(p.sub);
280 Indent(indent);
281 gen.Write("if ("); GenCond(s1, p.sub); gen.WriteLine(") {");
282 GenCode(p.sub, indent + 1, s1);
283 Indent(indent); gen.WriteLine("}");
284 break;
285 }
286 if (p.typ != Node.eps && p.typ != Node.sem && p.typ != Node.sync)
287 isChecked.SetAll(false); // = new BitArray(tab.terminals.Count);
288 if (p.up) break;
289 p = p.next;
290 }
291 }
292
293 void GenTokens() {
294 foreach (Symbol sym in tab.terminals) {
295 if (Char.IsLetter(sym.name[0]))
296 gen.WriteLine(" public const int _{0} = {1};", sym.name, sym.n);
297 }
298 }
299
300 void GenPragmas() {
301 foreach (Symbol sym in tab.pragmas) {
302 gen.WriteLine(" public const int _{0} = {1};", sym.name, sym.n);
303 }
304 }
305
306 void GenCodePragmas() {
307 foreach (Symbol sym in tab.pragmas) {
308 gen.WriteLine(" if (la.kind == {0}) {{", sym.n);
309 CopySourcePart(sym.semPos, 4);
310 gen.WriteLine(" }");
311 }
312 }
313
314 void GenProductions() {
315 foreach (Symbol sym in tab.nonterminals) {
316 curSy = sym;
317 gen.Write(" void {0}(", sym.name);
318 CopySourcePart(sym.attrPos, 0);
319 gen.WriteLine(") {");
320 CopySourcePart(sym.semPos, 2);
321 GenCode(sym.graph, 2, new BitArray(tab.terminals.Count));
322 gen.WriteLine(" }"); gen.WriteLine();
323 }
324 }
325
326 void InitSets() {
327 for (int i = 0; i < symSet.Count; i++) {
328 BitArray s = (BitArray)symSet[i];
329 gen.Write(" {");
330 int j = 0;
331 foreach (Symbol sym in tab.terminals) {
332 if (s[sym.n]) gen.Write("_T,"); else gen.Write("_x,");
333 ++j;
334 if (j%4 == 0) gen.Write(" ");
335 }
336 if (i == symSet.Count-1) gen.WriteLine("_x}"); else gen.WriteLine("_x},");
337 }
338 }
339
340 public void WriteParser () {
341 Generator g = new Generator(tab);
342 int oldPos = buffer.Pos; // Pos is modified by CopySourcePart
343 symSet.Add(tab.allSyncSets);
344
345 fram = g.OpenFrame("Parser.frame");
346 gen = g.OpenGen("Parser.cs");
347 err = new StringWriter();
348 foreach (Symbol sym in tab.terminals) GenErrorMsg(tErr, sym);
349
350 g.GenCopyright();
351 g.SkipFramePart("-->begin");
352
353 if (usingPos != null) { CopySourcePart(usingPos, 0); gen.WriteLine(); }
354 g.CopyFramePart("-->namespace");
355 /* AW open namespace, if it exists */
356 if (tab.nsName != null && tab.nsName.Length > 0) {
357 gen.WriteLine("namespace {0} {{", tab.nsName);
358 gen.WriteLine();
359 }
360 g.CopyFramePart("-->constants");
361 GenTokens(); /* ML 2002/09/07 write the token kinds */
362 gen.WriteLine(" public const int maxT = {0};", tab.terminals.Count-1);
363 GenPragmas(); /* ML 2005/09/23 write the pragma kinds */
364 g.CopyFramePart("-->declarations"); CopySourcePart(tab.semDeclPos, 0);
365 g.CopyFramePart("-->pragmas"); GenCodePragmas();
366 g.CopyFramePart("-->productions"); GenProductions();
367 g.CopyFramePart("-->parseRoot"); gen.WriteLine(" {0}();", tab.gramSy.name); if (tab.checkEOF) gen.WriteLine(" Expect(0);");
368 g.CopyFramePart("-->initialization"); InitSets();
369 g.CopyFramePart("-->errors"); gen.Write(err.ToString());
370 g.CopyFramePart(null);
371 /* AW 2002-12-20 close namespace, if it exists */
372 if (tab.nsName != null && tab.nsName.Length > 0) gen.Write("}");
373 gen.Close();
374 buffer.Pos = oldPos;
375 }
376
377 public void WriteStatistics () {
378 trace.WriteLine();
379 trace.WriteLine("{0} terminals", tab.terminals.Count);
380 trace.WriteLine("{0} symbols", tab.terminals.Count + tab.pragmas.Count +
381 tab.nonterminals.Count);
382 trace.WriteLine("{0} nodes", tab.nodes.Count);
383 trace.WriteLine("{0} sets", symSet.Count);
384 }
385
386 public ParserGen (Parser parser) {
387 tab = parser.tab;
388 errors = parser.errors;
389 trace = parser.trace;
390 buffer = parser.scanner.buffer;
391 errorNr = -1;
392 usingPos = null;
393 }
394
395 } // end ParserGen
396
397 } // end namespace