// $Id: Parser.cs 83 2003-06-21 08:03:35Z mdupont $ // PxButton | build | csc /o /doc:Euler.xml *.cs | using System; using System.Collections; using System.Text; // for stack trace! using System.Diagnostics; using System.Reflection; /// N3 parser /// Jos De Roo public class Parser { internal String str; // N3 string internal int pos = 0; // tokenizer position internal ArrayList vt = null; // quantified variables table internal Euler r = null; // root Euler object internal String u = null; // base URI of the RDF resource internal const String ST = "{}[]()<>\"';,.^! \t\r\n\\"; internal const String LOGa = "a"; internal const String LOGe = "="; internal const String LOGi = "=>"; internal const String LOGimplies = ""; internal const String LOGnotImplies = ""; internal const String DPO = "constructs an N3 parser static int instancecount = 0; int instance = 0; public Parser() { instance = instancecount++; } /// constructs an N3 parser /// the N3 string /// quantified variables table /// Euler object /// URI of the RDF resource public Parser(String s, ArrayList vartab, Euler root, String uri) { instance = instancecount++; str = s; vt = vartab; r = root; u = uri; } /// N3 triple parse method /// Euler object public virtual Euler Parse() { String nt = tokenize(); if (nt == null) return null; return parse(true, nt); } /// N3 node parse method /// next token /// Euler object public virtual Euler Parse(String nt) { Console.Error.WriteLine (" public virtual Euler Parse(String nt)"); return parse(false, nt); } internal Euler parse(bool b, String nt) { Console.Error.WriteLine ("internal Euler parse(bool b, String nt)"); Console.Error.WriteLine ("parser stack" + instance); StackTrace st = new System.Diagnostics.StackTrace(true); for(int i = 0; i < st.FrameCount; i ++) { StackFrame sf = st.GetFrame(i); Debug.WriteLine(" File: " + sf.GetFileName() + " Line: " + sf.GetFileLineNumber() + " Method: " + sf.GetMethod()); } Console.Error.WriteLine ("parser end of stack"); Euler e = new Euler(); Console.Error.WriteLine ("new euler"); try { if (b) { Console.Error.WriteLine ("ifb"); if (nt == null) { Console.Error.WriteLine ("if nt == null"); e.subj = null; } else if (nt.Equals("{")) { Console.Error.WriteLine ("if nt == {"); Console.Error.WriteLine ("before tokenize"); nt = tokenize(); if (nt.Equals(".")) { Console.Error.WriteLine ("if nt == ."); Console.Error.WriteLine ("going to recurse"); e.subj = parse(false, "{}"); Console.Error.WriteLine ("after recurse"); } else { Console.Error.WriteLine ("else"); Console.Error.WriteLine ("going to recurse2"); e.subj = parse(true, nt); Console.Error.WriteLine ("after recurse2"); } Euler el = e.subj; Console.Error.WriteLine ("before tokenize2"); nt = tokenize(); Console.Error.WriteLine ("after tokenize2"); while (el.near != null) { el = el.near; } while (!nt.Equals("}")) { Console.Error.WriteLine ("going to recurse 3"); el.near = parse(true, nt); Console.Error.WriteLine ("after recurse 3"); while (el.near != null) { el = el.near; } nt = tokenize(); } } else if (nt.Equals("[")) { Console.Error.WriteLine ("recurse 4"); e.subj = parse(true, null); Console.Error.WriteLine ("~recurse 4"); if (e.subj.verb == null) r.verb = ";]"; nt = tokenize(); if (!nt.Equals("]")) Console.Error.WriteLine("** (p1) expecting ] at " + e + " but got " + nt); } else if (nt.Equals("(")) { list(e); e.subj = e.obj; e.obj = null; } else { Console.Error.WriteLine ("recurse 5"); e.subj = parse(false, nt); Console.Error.WriteLine ("~recurse 5"); if (nt.Equals("this")) e.subj.verb = '<' + u + "#frag" + e.GetHashCode() + '>'; } nt = tokenize(); } Console.Error.WriteLine ("after ifb"); //// ------------------------ if (nt == null || nt.Equals(";")) { Console.Error.WriteLine ("nt = ;"); e.bound = true; Console.Error.WriteLine ("Parser:Return1"); return e; } else if (e.subj != null && nt.StartsWith("_:")) { // verb handler! Console.Error.WriteLine ("nt =~ _:"); e.cverb = nt; if (!vt.Contains(nt)) vt.Add(nt); } else if (nt.Equals("[")) { // verb handler! Console.Error.WriteLine ("nt = ["); String a = "_:" + e.GetHashCode(); if (!vt.Contains(a)) vt.Add(a); Console.Error.WriteLine ("recurse 6"); Euler ap = parse(true, a); Console.Error.WriteLine ("~recurse 6"); e.cverb = a; if (ap.verb != null) r.verb = ";]"; nt = tokenize(); if (!nt.Equals("]")) Console.Error.WriteLine("** (p2) expecting ] at " + e + " but got " + nt); e.near = ap; } else if (nt.Equals("(")) { Console.Error.WriteLine ("nt = ("); list(e); Console.Error.WriteLine ("Parser:Return2"); return e.obj; } else { Console.Error.WriteLine ("else e.cverb = nt;"); e.cverb = nt; } //------------------------------ Console.Error.WriteLine ("ok, copy verb"); e.verb = e.cverb; if (e.verb.EndsWith("@@")) e.verb = e.verb.Substring(0, e.verb.Length - 2); String s1 = e.verb; String s2 = ""; // Console.Error.WriteLine("e.verb index of " + e.verb); if (e.verb.IndexOf('"') != -1) { s1 = e.verb.Substring(0, e.verb.IndexOf('"')); s2 = e.verb.Substring(e.verb.IndexOf('"')); } if (r != null && s1.Length > 0 && !s1.Equals(LOGa) && !s1.Equals(LOGe) && s1[0] != '(' && s1[0] != '<' && s1[0] != '[' && s1[0] != '_' && s1[0] != '"' && s1[0] != '\'' && s1.IndexOf(':') != - 1) { String pf = s1.Substring(0, s1.IndexOf(':') + 1); String pg = (String) r.hshtNsp[pf]; // TODO check if (pg != null) pf = pg; String qf = (String) r.hshtNs[pf]; if (qf == null) { Console.Error.WriteLine("** no @prefix " + pf + " found, taking <" + u + "#>"); qf = "<" + u + "#>"; r.hshtNs[pf] = qf; } StringBuilder sb = new StringBuilder(pf); // Console.Error.WriteLine("e.cverb.IndexOf " + e.cverb); sb.Append(e.cverb.Substring(e.cverb.IndexOf(':') + 1)); e.cverb = sb.ToString(); sb = new StringBuilder(qf); sb.Insert(sb.Length - 1, s1.Substring(s1.IndexOf(':') + 1)); e.verb = sb.ToString() + s2; } else if (u != null && s1.Length > 0 && s1[0] == '<' && s1.IndexOf(':') == - 1) e.verb = e.cverb = '<' + toURI(s1.Substring(1, s1.Length - 2)) + '>' + s2; //TODO see ParseSpecials(); if (e.verb.StartsWith("?") && !vt.Contains(e.verb)) vt.Add(e.verb); if (vt != null) { // Console.Error.WriteLine("TODO: This broken"); // Console.Error.WriteLine("count " + count); int count = vt.Count; if (count > 0) { // Console.Error.WriteLine("e.varid = vt.IndexOf(e.verb); " + vt +":"+ e.verb); // e.varid = vt.IndexOf(e.verb); } } if (e.varid == - 1) e.bound = true; if (nt.Equals(".")) { Console.Error.WriteLine ("going to recurse 7"); e.obj = parse(false, ""); Console.Error.WriteLine ("~ recurse 7"); e.verb = ""; e.cverb = ""; e.bound = true; Console.Error.WriteLine ("Parser:Return3"); return e; } if (str != null && pos < str.Length && str[pos] == '^' && str[pos + 1] == '^') { pos = pos + 2; Console.Error.WriteLine ("going to recurse 8"); Euler et = parse(false, tokenize()); Console.Error.WriteLine ("~going to recurse 8"); if (!et.verb.Equals(XSDstring)) { e.subj = e.copy(); e.verb = e.cverb = "^^"; e.bound = true; e.obj = et; String.Intern(e.obj.verb); if (e.subj.bound) { //TODO Datatype.Compare(e.obj.verb, r.getLit(e.subj), r.getLit(e.subj)); } } } //--------------------------------- Console.Error.WriteLine ("before if b"); if (b) { // Console.Error.WriteLine ("if b"); // nt = tokenize(); // if (nt.Equals("{")) { // nt = tokenize(); // Console.Error.WriteLine ("going to recurse 9"); // if (nt.Equals(".")) e.obj = parse(false, "{}"); // else e.obj = parse(true, nt); // Console.Error.WriteLine ("~recurse 9"); // Euler el = e.obj; // nt = tokenize(); // while (nt != null && !nt.Equals("}")) { // Console.Error.WriteLine ("going to recurse 10"); // el.near = parse(true, nt); // Console.Error.WriteLine ("~recurse 10"); // el = el.near; // nt = tokenize(); // } // } // else if (nt.Equals("[")) { // Console.Error.WriteLine ("going to recurse 11"); // e.obj = parse(true, null); // Console.Error.WriteLine ("~recurse 11"); // if (e.obj.verb != null) r.verb = ";]"; // nt = tokenize(); // if (!nt.Equals("]")) Console.Error.WriteLine("** (p3) expecting ] at " + e + " but got " + nt); // } // else if (nt.Equals("(")) list(e); // else { // Console.Error.WriteLine ("recurse 12"); // e.obj = parse(false, nt); // Console.Error.WriteLine ("~recurse 12"); // if (e.obj.verb.Equals(OWLFunctionalProperty)) // { // int x= e.subj.verb; // r.hshtMto[x] = r; // } // if (e.obj.verb.Equals(OWLInverseFunctionalProperty)) r.hshtOtm[e.subj.verb] = r; // } // nt = tokenize(); // if (nt.EndsWith("\"\"\"")) { e.obj.cverb = e.obj.cverb + nt; e.obj.verb = e.obj.cverb; nt = tokenize(); } Euler el2 = e; while (nt.Equals(";") || nt.Equals(",")) { while (el2.near != null) el2 = el2.near; if (nt.Equals(";")) { nt = tokenize(); if (nt.Equals("]")) { r.verb = ";]"; nt = ";"; break; } else if (nt.Equals(".")) break; } else nt = el2.cverb; Console.Error.WriteLine ("going to recurse 13"); el2.near = parse(false, nt); Console.Error.WriteLine ("~ recurse 13"); el2.near.subj = e.subj; nt = tokenize(); if (nt.Equals("{")) { nt = tokenize(); Console.Error.WriteLine ("going to recurse 14"); if (nt.Equals(".")) el2.near.obj = parse(false, "{}"); else el2.near.obj = parse(true, nt); Console.Error.WriteLine ("~recurse 14"); Euler ef = el2.near.obj; nt = tokenize(); while (nt != null && !nt.Equals("}")) { Console.Error.WriteLine ("going to recurse 15"); ef.near = parse(true, nt); Console.Error.WriteLine ("~recurse 15"); ef = ef.near; nt = tokenize(); } } else if (nt.Equals("[")) { Console.Error.WriteLine ("going to recurse 16"); el2.near.obj = parse(true, null); Console.Error.WriteLine ("~recurse 16"); if (el2.near.obj.verb != null) r.verb = ";]"; nt = tokenize(); if (!nt.Equals("]")) Console.Error.WriteLine("** (p4) expecting ] at " + e + " but got " + nt); } else if (nt.Equals("(")) list(el2.near); else { Console.Error.WriteLine ("going to recurse 17"); Euler returnv = parse(false, nt); Console.Error.WriteLine ("~recurse 17~"); Console.Error.WriteLine (" before set"); el2.near.obj = returnv; Console.Error.WriteLine (" after set"); // if (el2.near.obj.verb.Equals(OWLFunctionalProperty)) r.hshtMto[el2.near.subj.verb] = r; // if (el2.near.obj.verb.Equals(OWLInverseFunctionalProperty)) r.hshOtm[el2.near.subj.verb] = r; } nt = tokenize(); if (nt.EndsWith("\"\"\"")) { el2.near.obj.cverb = el2.near.obj.cverb + nt; el2.near.obj.verb = el2.near.obj.cverb; nt = tokenize(); } swap(el2.near); } if (!nt.Equals(".") && !nt.Equals(";")) Console.Error.WriteLine("** (p5) expecting . or ; at " + e + " but got " + nt); } Console.Error.WriteLine ("after if b"); swap(e); e.far = r; Console.Error.WriteLine ("parser before return stack" ); Console.Error.WriteLine ("parser instance" + instance); StackTrace st = new System.Diagnostics.StackTrace(true); for(int i = 0; i < st.FrameCount; i ++) { StackFrame sf = st.GetFrame(i); Debug.WriteLine(" File: " + sf.GetFileName() + " Line: " + sf.GetFileLineNumber() + " Method: " + sf.GetMethod()); } Console.Error.WriteLine ("parser before return"); return e; } catch (NullReferenceException exc) { Console.Error.WriteLine ("catch"); Console.Error.WriteLine(exc.StackTrace); Console.Error.WriteLine ("before return exception"); return e; } } internal String tokenize() { String nt = token(); while (pos < str.Length && nt == null) nt = token(); return nt; } internal String next() { int start = pos; // Console.Error.WriteLine("ST.IndexOf(str[pos]) " + ST + ":" +str[pos] ); while (pos < str.Length && ST.IndexOf(str[pos]) < 0) { pos++; // Console.Error.WriteLine("ST.IndexOf(str[pos]) " + ST + ":" +str[pos] ); } if (start == pos && ST.IndexOf(str[pos]) >= 0) pos++; Console.Error.WriteLine ("Parser::next going to return"); return str.Substring(start, (pos) - (start)); } internal String token() { Console.Error.WriteLine("Parser::Token()" ); if (r.verb != null && r.verb.Equals(".}")) return r.verb = "}"; if (r.verb != null && r.verb.Equals(";]")) return r.verb = "]"; if (pos >= str.Length) return null; int start = pos; String t = null; String nt = next(); Console.Error.WriteLine("token nt.IndexOf #" + nt); if (nt.IndexOf('#') != - 1) { t = nt.Substring(0, nt.IndexOf('#')); String v = r.verb; while (nt != null && !nt.Equals("\n") && pos < str.Length) nt = next(); r.verb = v; if (t.Equals("")) return null; } else if (nt.Equals("@prefix") || nt.Equals("bind")) { String nsc = tokenize(); if (nsc.Equals("default")) nsc = ":"; String nsd = nsc; String nsu = tokenize(); // Console.Error.WriteLine("nsu.IndexOf #" + nsu); if (u != null && nsu.Length > 0 && nsu[0] == '<' && nsu.IndexOf(':') == - 1) nsu = '<' + toURI(nsu.Substring(1, nsu.Length - 2)) + '>'; String nsv = (String) r.hshtNs[nsd]; while (nsv != null && !nsu.Equals(nsv)) { nsd = "ns" + nsd; nsv = (String) r.hshtNs[nsd]; } r.hshtNsp[nsd] = nsd; // TODO check r.hshtNsp[nsc] = nsd; // TODO check r.hshtNs[nsd] = nsu; tokenize(); return null; } else if (nt.Equals("\"")) { StringBuilder sb = new StringBuilder("\""); if (str[pos] == '"' && str[pos+1] == '"') { sb.Append(next()); sb.Append(next()); while (true) { sb.Append(next()); if (sb.ToString().EndsWith("\"\"\"")) break; } t = sb.ToString(); } else { nt = next(); while (!nt.Equals("\"")) { if (nt.Equals("\\")) { sb.Append(nt); nt = next(); } sb.Append(nt); if (pos >= str.Length) { Console.Error.WriteLine("** (t1) expecting \" at " + sb); break; } nt = next(); } sb.Append("\""); // Console.Error.WriteLine("-@" + str[pos]); if ("-@".IndexOf(str[pos]) != -1) sb.Append(token().ToLower()); t = sb.ToString(); } } else if (nt.Equals("'")) { StringBuilder sb = new StringBuilder("'"); nt = next(); while (!nt.Equals("'")) { sb.Append(nt); if (pos >= str.Length) { Console.Error.WriteLine("** (t2) expecting ' at " + sb); break; } nt = next(); } t = sb.Append("'").ToString(); } else if (nt.Equals("<")) { StringBuilder sb = new StringBuilder("<"); nt = next(); while (!nt.Equals(">")) { sb.Append(nt); if (pos >= str.Length) { Console.Error.WriteLine("** (t2) expecting > at " + sb); break; } nt = next(); } t = sb.Append(">").ToString(); } else if (nt.Equals("=")) { if (str[pos] == '>') t = nt + next(); else t = nt; } else if (nt.Equals("is")) { t = tokenize(); nt = tokenize(); if (!nt.Equals("of")) Console.Error.WriteLine("** (t4) expecting \"of\" but got " + nt); t = t + "@@"; } else if (nt.Equals("has")) { t = tokenize(); nt = tokenize(); if (!nt.Equals("of")) Console.Error.WriteLine("** (t5) expecting \"of\" but got " + nt); } else if (nt.Equals(" ") || nt.Equals("\t") || nt.Equals("\r") || nt.Equals("\n") || nt.Equals("-") || nt.Equals(">")) return null; else if (nt.StartsWith("_:") && !nt.EndsWith("_" + Euler.doc)) t = nt + "_" + Euler.doc; else t = nt; // Console.Error.WriteLine("ST . t " + ST + t); if (pos < str.Length && ST.IndexOf(t) == -1 && t[0] != '"' && str[pos] == '"') t = t + token(); if (pos < str.Length && str[pos] == '.') { try { Console.Error.WriteLine ("token call to parse" + t); long ts = Int64.Parse(t); Console.Error.WriteLine ("token parse returned"); t = ts + next() + next(); } catch (Exception) { Console.Error.WriteLine ("Token Caught exeception in parsing integer"); } } Console.Error.WriteLine ("near end of Token()"); if (r.verb != null && t != null && !r.verb.Equals(".") && t.Equals("}")) { r.verb = ".}"; Console.Error.WriteLine ("Token:return ."); return "."; } if (r.verb != null && t != null && !r.verb.Equals(";") && t.Equals("]")) { r.verb = ";]"; Console.Error.WriteLine ("Token:return ;"); return ";"; } Console.Error.WriteLine ("Token:return r.verb = t;"); return r.verb = t; } internal void swap(Euler e) { Console.Error.WriteLine ("Parser swap"); if (e != null && e.subj == null && e.cverb.EndsWith("@@")) { e.subj = e.obj; Console.Error.WriteLine ("swap going to call parse"); e.obj = parse(false, e.cverb.Substring(0, e.cverb.Length - 2)); Console.Error.WriteLine ("swap ~ call parse"); e.verb = e.cverb = "^^"; } else if (e.subj != null && e.obj != null && e.cverb.EndsWith("@@")) { Euler el = e.subj; e.subj = e.obj; e.obj = el; e.cverb = e.cverb.Substring(0, e.cverb.Length - 2); } } internal void list(Euler e) { Console.Error.WriteLine ("parser_list"); Euler el = e; while (true) { String nt = tokenize(); if (nt.StartsWith("@")) { Console.Error.WriteLine ("list going to call parse"); el.obj = parse(false, '?' + nt.Substring(1)); Console.Error.WriteLine ("list ~call parse"); nt = tokenize(); if (!nt.Equals(")")) Console.Error.WriteLine("** (p6) expecting ) at " + el + " but got " + nt); break; } else if (nt.Equals(")")) { el.obj = parse(false, RDFnil); break; } else el.obj = parse(false, RDFfirst); if (nt.Equals("[")) { el.obj.obj = parse(true, null); tokenize(); } else el.obj.obj = parse(false, nt); el.obj.near = parse(false, RDFrest); el.obj.near.near = parse(false, LOGa); el.obj.near.near.obj = parse(false, RDFList); el = el.obj.near; } } internal String toURI(String s) { try { s = new Uri(new Uri(u), s).ToString(); } catch (Exception e) { if (!u.StartsWith("file:")) Console.Error.WriteLine(u + " " + s + " " + e); } if (s.EndsWith(".n3#")) s = s.Substring(0, s.Length - 4) + "#"; return s; } }