// $Id: Parser.cs 1295 2007-05-11 16:52:51Z josd $ namespace Eulersharp { using System; using System.Collections; using System.Globalization; using System.Text; using System.Xml; /// N3 parser /// Jos De Roo public class Parser { internal String str; // N3 string internal int pos = 0; // tokenizer position internal int line = 1; // line counter internal ArrayList vt = null; // quantified variables table internal Euler r = null; // root Euler object internal String u = null; // base URI of the RDF resource internal const String ST = "{}[]()<>\"';,.^! \t\r\n\\"; internal const String LOGa = "a"; internal const String LOGe = "="; internal const String LOGi = "=>"; internal const String DPO = "constructs an N3 parser public Parser() { } /// constructs an N3 parser /// the N3 string /// quantified variables table /// Euler object /// URI of the RDF resource public Parser(String s, ArrayList vartab, Euler root, String uri) { str = s; vt = vartab; r = root; u = uri; } /// N3 triple parse method /// Euler object public virtual Euler Parse() { String nt = tokenize(); if (nt == null) return null; return parse(true, nt); } /// N3 node parse method /// next token /// Euler object public virtual Euler Parse(String nt) { return parse(false, nt); } internal Euler parse(bool b, String nt) { Euler e = new Euler(); try { if (u != null && u.StartsWith("_:")) e.src = Euler.Ekb; else e.src = "<" + u + ">"; e.line = line; if (b) { if (nt == null) e.subj = null; else if (nt.Equals("{")) { nt = tokenize(); if (nt.Equals(".")) e.subj = parse(false, "{}"); else e.subj = parse(true, nt); Euler el = e.subj; while (el.near != null) el = el.near; nt = tokenize(); while (!nt.Equals("}")) { el.near = parse(true, nt); while (el.near != null) el = el.near; nt = tokenize(); } } else if (nt.Equals("[")) { e.subj = parse(true, null); if (e.subj.verb == null) r.verb = ";]"; nt = tokenize(); if (!nt.Equals("]")) Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (p1) expecting ] at " + e + " but got " + nt); int cpos = pos; nt = tokenize(); if (nt.Equals(".")) { e.obj = parse(false, ""); e.verb = ""; e.cverb = ""; e.bound = true; return e; } else pos = cpos; } else if (nt.Equals("(")) { list(e); e.subj = e.obj; e.obj = null; int cpos = pos; nt = tokenize(); if (nt.Equals(".")) { e.obj = parse(false, ""); e.verb = ""; e.cverb = ""; e.bound = true; return e; } else pos = cpos; } else { e.subj = parse(false, nt); if (nt.Equals("this")) e.subj.verb = '<' + u + "#frag" + e.uid + '>'; } path(e.subj); if (nt != null && nt.Equals("@forAll")) { e.subj = parse(false, "this"); nt = "log:forAll"; } else if (nt != null && nt.Equals("@forSome")) { e.subj = parse(false, "this"); nt = "log:forSome"; } else nt = tokenize(); } if (nt == null || nt.Equals(";")) { e.bound = true; return e; } else if (e.subj != null && nt.StartsWith("_:")) { e.cverb = nt; if (!vt.Contains(nt)) vt.Add(nt); } else if (nt.Equals("[")) { String a = "_:e" + e.uid; if (!vt.Contains(a)) vt.Add(a); Euler ap = parse(true, a); e.cverb = a; if (ap.verb != null) r.verb = ";]"; nt = tokenize(); if (!nt.Equals("]")) Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (p2) expecting ] at " + e + " but got " + nt); if (ap.verb.Equals(Euler.OWLsameAs)) { e.cverb = ap.obj.cverb; e.vv = true; } else e.near = ap; } else if (nt.Equals("(")) { list(e); return e.obj; } else e.cverb = nt; if (e.cverb.EndsWith("_@")) { e.vv = true; e.cverb = e.cverb.Substring(0, e.cverb.Length - 2); } e.verb = e.cverb; if (e.verb.EndsWith("@@")) e.verb = e.verb.Substring(0, e.verb.Length - 2); if (r != null && e.verb.Length > 0 && e.verb[0] == '<') { String nsq = toURI(e.verb.Substring(1, e.verb.Length - 2)); int nsh = nsq.LastIndexOf('#'); int nss = nsq.LastIndexOf('/'); String nsu = '<' + (nsh != -1 ? nsq.Substring(0, nsh + 1) : nsq.Substring(0, nss + 1)) + '>'; String nsn = nsh != -1 ? nsq.Substring(nsh + 1) : nsq.Substring(nss + 1); String nsp = null; for (IEnumerator enr = r.ext.ns.Keys.GetEnumerator(); enr.MoveNext(); ) { String nsx = (String)enr.Current; String nsy = (String)r.ext.ns[nsx]; if (nsu.Equals(nsy)) nsp = nsx; } if (nsp == null) { int i = 0; while (true) { nsp = "nsp" + i + ':'; if (r.ext.ns[nsp] == null) break; i++; } r.ext.ns[nsp] = nsu; } e.verb = e.cverb = nsp + nsn; } String s1 = e.verb; String s2 = ""; if (e.verb.IndexOf('"') != -1) { s1 = e.verb.Substring(0, e.verb.IndexOf('"')); s2 = e.verb.Substring(e.verb.IndexOf('"')); } if (r != null && s1.Length > 0 && s1.IndexOf(':') == -1 && s1[0] != '(' && s1[0] != '<' && s1[0] != '[' && s1[0] != '_' && s1[0] != '"' && s1[0] != '?' && !r.isNumeral(s1) && r.ext.kw[s1] == null) s1 = ":" + s1; if (r != null && s1.Length > 0 && !s1.Equals(LOGa) && !s1.Equals(LOGe) && s1[0] != '(' && s1[0] != '<' && s1[0] != '[' && s1[0] != '_' && s1[0] != '"' && s1.IndexOf(':') != - 1) { String pf = s1.Substring(0, s1.IndexOf(':') + 1); String pg = (String) r.ext.nsp[pf]; if (pg != null) pf = pg; String qf = (String) r.ext.ns[pf]; if (qf == null) { Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": no @prefix " + pf + " found for " + s1 + ", taking <" + u + "#>"); qf = "<" + u + "#>"; r.ext.ns[pf] = qf; } StringBuilder sb = new StringBuilder(pf); sb.Append(e.cverb.Substring(e.cverb.IndexOf(':') + 1)); e.cverb = sb.ToString(); sb = new StringBuilder(qf); sb.Insert(sb.Length - 1, s1.Substring(s1.IndexOf(':') + 1)); e.verb = sb.ToString() + s2; } else if (u != null && s1.Length > 0 && s1[0] == '<' && s1.IndexOf(':') == - 1) e.verb = e.cverb = '<' + toURI(s1.Substring(1, s1.Length - 2)) + '>' + s2; if (e.verb.Length > 0 && e.verb[0] == '?' && e.verb.IndexOf('@') != -1 || e.verb.IndexOf("\"@") != -1 && str[pos] != '^' && str[pos + 1] != '^') { r.ext.kw[e.verb.Substring(e.verb.LastIndexOf('@') + 1)] = r; e.subj = parse(false, e.verb.Substring(0, e.verb.LastIndexOf('@'))); e.obj = parse(false, e.verb.Substring(e.verb.LastIndexOf('@') + 1)); e.verb = e.cverb = "@"; } if (e.verb.Equals(LOGa)) e.verb = Euler.RDFtype; if (e.verb.Equals(LOGe)) e.verb = Euler.OWLsameAs; if (e.verb.Equals(LOGi)) e.verb = Euler.LOGimplies; if (e.verb.StartsWith(DPO)) e.verb = Euler.OWL + e.verb.Substring(DPO.Length); if (e.verb.StartsWith(ONT)) e.verb = Euler.OWL + e.verb.Substring(ONT.Length); if (e.verb.Equals(OWLequivalentTo)) e.verb = Euler.OWLsameAs; if (e.verb.Equals(OWLsameIndividualAs)) e.verb = Euler.OWLsameAs; if (e.verb.Equals(OWLsameClassAs)) e.verb = Euler.OWLequivalentClass; if (e.verb.Equals(OWLsamePropertyAs)) e.verb = Euler.OWLequivalentProperty; if (e.verb.Equals(OWLdifferentIndividualFrom)) e.verb = Euler.OWLdifferentFrom; if (e.verb.Equals(OWLUnambiguousProperty)) e.verb = Euler.OWLInverseFunctionalProperty; if (e.verb.Equals(OWLUniqueProperty)) e.verb = Euler.OWLFunctionalProperty; if (e.verb.Equals(Euler.RDFfirst)) e.cverb = "rdf:first"; if (e.verb.Equals(Euler.RDFrest)) e.cverb = "rdf:rest"; if (e.verb.Equals(Euler.RDFnil)) e.cverb = "()"; if (e.verb.StartsWith("?") && vt != null && !vt.Contains(e.verb)) vt.Add(e.verb); if (vt != null && vt.Count > 0) e.varid = vt.IndexOf(e.verb); if (e.varid == - 1) e.bound = true; if (nt.Equals(".") && (str[pos] == ' ' || str[pos] == '\t' || str[pos] == '\r' || str[pos] == '\n')) { e.obj = parse(false, ""); e.verb = ""; e.cverb = ""; e.bound = true; return e; } if (!nt.Equals(Euler.RDFfirst) && str != null && pos < str.Length && str[pos] == '^' && str[pos + 1] == '^') { pos = pos + 2; Euler et = parse(false, tokenize()); if (!et.verb.Equals(Euler.XSDstring)) { e.subj = e.copy(); e.verb = e.cverb = "^^"; e.bound = true; e.obj = et; } } if (b) { nt = tokenize(); if (nt.Equals("{")) { nt = tokenize(); if (nt.Equals(".")) e.obj = parse(false, "{}"); else e.obj = parse(true, nt); Euler el = e.obj; while (el.near != null) el = el.near; nt = tokenize(); while (nt != null && !nt.Equals("}")) { el.near = parse(true, nt); while (el.near != null) el = el.near; nt = tokenize(); } } else if (nt.Equals("[")) { e.obj = parse(true, null); if (e.obj.verb != null) r.verb = ";]"; nt = tokenize(); if (!nt.Equals("]")) Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (p3) expecting ] at " + e + " but got " + nt); } else if (nt.Equals("(")) list(e); else e.obj = parse(false, nt); path(e.obj); nt = tokenize(); if (nt == null) return e; if (nt.EndsWith("\"\"\"")) { e.obj.cverb = e.obj.cverb + nt; e.obj.verb = e.obj.cverb; nt = tokenize(); } Euler el2 = e; while (nt.Equals(";") || nt.Equals(",")) { while (el2.near != null) el2 = el2.near; if (nt.Equals(";")) { nt = tokenize(); if (nt.Equals("]")) { r.verb = ";]"; nt = ";"; break; } else if (nt.Equals(".")) break; } else nt = el2.cverb; el2.near = parse(false, nt); el2.near.subj = e.subj; nt = tokenize(); if (nt.Equals("{")) { nt = tokenize(); if (nt.Equals(".")) el2.near.obj = parse(false, "{}"); else el2.near.obj = parse(true, nt); Euler ef = el2.near.obj; while (ef.near != null) ef = ef.near; nt = tokenize(); while (nt != null && !nt.Equals("}")) { ef.near = parse(true, nt); while (ef.near != null) ef = ef.near; nt = tokenize(); } } else if (nt.Equals("[")) { el2.near.obj = parse(true, null); if (el2.near.obj.verb != null) r.verb = ";]"; nt = tokenize(); if (!nt.Equals("]")) Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (p4) expecting ] at " + e + " but got " + nt); } else if (nt.Equals("(")) list(el2.near); else el2.near.obj = parse(false, nt); nt = tokenize(); if (nt == null) return e; if (nt.EndsWith("\"\"\"")) { el2.near.obj.cverb = el2.near.obj.cverb + nt; el2.near.obj.verb = el2.near.obj.cverb; nt = tokenize(); } swap(el2.near); } if (!nt.Equals(".") && !nt.Equals(";")) Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (p5) expecting . or ; at " + e + " but got " + nt); } swap(e); if (e.verb.Equals(Euler.LOGimplies) && e.subj != null && e.subj.verb.Equals("{}")) { e.obj.vv = true; e = e.obj; } e.far = r; return e; } catch (NullReferenceException exc) { Console.Error.WriteLine(exc); return e; } } internal String tokenize() { String nt = token(); while (pos < str.Length && nt == null) nt = token(); return nt; } internal String next() { int start = pos; while (pos < str.Length && ST.IndexOf(str[pos]) < 0) pos++; if (pos < str.Length && start == pos && ST.IndexOf(str[pos]) >= 0) pos++; if (pos < str.Length && str[pos] == '\n') line++; return str.Substring(start, (pos) - (start)); } internal String token() { if (r.verb != null && r.verb.Equals(".}")) return r.verb = "}"; if (r.verb != null && r.verb.Equals(";]")) return r.verb = "]"; if (pos >= str.Length) return null; int start = pos; String t = null; String nt = next(); if (nt.IndexOf('#') != - 1) { t = nt.Substring(0, nt.IndexOf('#')); String v = r.verb; while (nt != null && !nt.Equals("\n") && pos < str.Length) nt = next(); r.verb = v; if (t.Equals("")) return null; } else if (nt.Equals("SELECT")) { StringBuilder sb = new StringBuilder("("); nt = tokenize(); while (!nt.Equals("\r") && !nt.Equals("\n") && pos < str.Length) { sb.Append(nt); nt = next(); } t = sb.Append(')').ToString(); } else if (nt.Equals("CONSTRUCT")) { t = tokenize(); } else if (nt.Equals("FROM")) { // TODO nt = tokenize(); if (nt.Equals("NAMED")) tokenize(); t = tokenize(); } else if (nt.Equals("UNION")) { t = "."; } else if (nt.Equals("OPTIONAL")) { // TODO t = tokenize(); } else if (nt.Equals("FILTER")) { // TODO t = tokenize(); } else if (nt.Equals("@prefix") || nt.Equals("PREFIX")) { String nsc = tokenize(); if (nsc.Equals("default")) nsc = ":"; String nsd = null; String nsu = tokenize(); if (u != null && nsu.Length > 0 && nsu[0] == '<' && nsu.IndexOf(':') == - 1) nsu = '<' + toURI(nsu.Substring(1, nsu.Length - 2)) + '>'; for (IEnumerator enr = r.ext.ns.Keys.GetEnumerator(); enr.MoveNext(); ) { String nsx = (String)enr.Current; String nsy = (String)r.ext.ns[nsx]; if (nsu.Equals(nsy)) nsd = nsx; } if (nsd == null) { nsd = nsc; String nsv = (String)r.ext.ns[nsd]; int i = 0; while (nsv != null && !nsu.Equals(nsv)) { nsd = "p" + i + nsc; nsv = (String)r.ext.ns[nsd]; i++; } } r.ext.nsp[nsc] = nsd; r.ext.ns[nsd] = nsu; if (nt.Equals("@prefix")) tokenize(); return null; } else if (nt.Equals("@keywords")) { while (nt != null && !nt.Equals(".") && pos < str.Length) { nt = next(); if (!nt.Equals(" ") && !nt.Equals("\t") && !nt.Equals("\r") && !nt.Equals("\n") && !nt.Equals(",") && !nt.Equals(".")) r.ext.kw[nt] = r; } t = tokenize(); } else if (nt.Equals("\"")) { StringBuilder sb = new StringBuilder("\""); if (str[pos] == '"' && str[pos + 1] == '"') { sb.Append(next()).Append(next()); while (pos < str.Length && (str[pos] != '"' || str[pos + 1] != '"' || str[pos + 2] != '"')) sb.Append(next()); sb.Append(next()).Append(next()).Append(next()); } else { nt = next(); while (!nt.Equals("\"")) { if (nt.Equals("\\")) { sb.Append(nt); nt = next(); } sb.Append(nt); if (pos >= str.Length) { Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (t1) expecting \" at " + sb); break; } nt = next(); } sb.Append("\""); } if (pos < str.Length && str[pos] == '@') { String lang = token(); int i = lang.IndexOf('-'); if (i != -1) sb.Append(lang.Substring(0, i) + lang.Substring(i).ToUpper()); else sb.Append(lang); } t = sb.ToString(); } else if (nt.Equals("'")) { StringBuilder sb = new StringBuilder("'"); nt = next(); while (!nt.Equals("'")) { sb.Append(nt); if (pos >= str.Length) { Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (t2) expecting ' at " + sb); break; } nt = next(); } t = sb.Append("'").ToString(); } else if (nt.Equals("<")) { StringBuilder sb = new StringBuilder("<"); nt = next(); while (!nt.Equals(">")) { sb.Append(nt); if (pos >= str.Length) { Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (t2) expecting > at " + sb); break; } nt = next(); } t = sb.Append(">").ToString(); } else if (nt.Equals("=")) { if (str[pos] == '>') t = nt + next(); else t = nt; } else if (nt.Equals("is") || nt.Equals("@is")) { t = tokenize(); nt = tokenize(); if (!(nt.Equals("of") || nt.Equals("@of"))) Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (t4) expecting \"of\" but got " + nt); t = t + "@@"; } else if (nt.Equals("has") || nt.Equals("@has")) { t = tokenize() + "_@"; int cpos = pos; nt = tokenize(); if (!(nt.Equals("of") || nt.Equals("@of"))) pos = cpos; } else if (nt.Equals(" ") || nt.Equals("\t") || nt.Equals("\r") || nt.Equals("\n") || nt.Equals("-") || nt.Equals(">")) return null; else if (nt.StartsWith("_:") && !nt.EndsWith("_")) t = nt + "_" + Euler.doc + "_"; else t = nt; if (pos < str.Length && ST.IndexOf(t) == -1 && t[0] != '"' && str[pos] == '"') t = t + token(); while (pos < str.Length - 1 && str[pos] == '.' && str[pos + 1] != ' ' && str[pos + 1] != '\t' && str[pos + 1] != '\r' && str[pos + 1] != '\n' && str[pos + 1] != ']' && str[pos + 1] != '}' && str[pos + 1] != '#') { t = t + next() + next(); } if (r.verb != null && t != null && !r.verb.Equals(".") && t.Equals("}")) { r.verb = ".}"; return "."; } if (r.verb != null && t != null && !r.verb.Equals(";") && t.Equals("]")) { r.verb = ";]"; return ";"; } return r.verb = t; } internal void swap(Euler e) { if (e != null && e.subj == null && e.cverb.EndsWith("@@")) { e.subj = e.obj; e.obj = parse(false, e.cverb.Substring(0, e.cverb.Length - 2)); e.verb = e.cverb = "!"; } else if (e.subj != null && e.obj != null && e.cverb.EndsWith("@@")) { Euler el = e.subj; e.subj = e.obj; e.obj = el; e.cverb = e.cverb.Substring(0, e.cverb.Length - 2); } } internal void list(Euler e) { Euler el = e; while (true) { String nt = tokenize(); if (nt.StartsWith("@")) { el.obj = parse(false, '?' + nt.Substring(1)); nt = tokenize(); if (!nt.Equals(")")) Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (p6) expecting ) at " + el + " but got " + nt); break; } else if (nt.Equals(")")) { el.obj = parse(false, Euler.RDFnil); break; } else el.obj = parse(false, Euler.RDFfirst); if (nt.Equals("{")) { nt = tokenize(); if (nt.Equals(".")) el.obj.obj = parse(false, "{}"); else el.obj.obj = parse(true, nt); Euler er = el.obj.obj; while (er.near != null) er = er.near; nt = tokenize(); while (!nt.Equals("}")) { er.near = parse(true, nt); while (er.near != null) er = er.near; nt = tokenize(); } } else if (nt.Equals("[")) { el.obj.obj = parse(true, null); tokenize(); } else el.obj.obj = parse(false, nt); path(el.obj.obj); el.obj.near = parse(false, Euler.RDFrest); el = el.obj.near; } path(e.obj); } internal void path(Euler e) { if (str != null && pos < str.Length - 1 && str[pos] == '!' && str[pos + 1] != ' ' && str[pos + 1] != '\t' && str[pos + 1] != '\r' && str[pos + 1] != '\n' && str[pos + 1] != ']' && str[pos + 1] != '}' && str[pos + 1] != '#') { pos = pos + 1; e.subj = e.copy(); e.verb = e.cverb = "!"; e.bound = true; e.obj = parse(false, tokenize()); e.near = null; path(e); } if (str != null && pos < str.Length - 1 && str[pos] == '^' && str[pos + 1] != '^' && str[pos + 1] != ' ' && str[pos + 1] != '\t' && str[pos + 1] != '\r' && str[pos + 1] != '\n' && str[pos + 1] != ']' && str[pos + 1] != '}' && str[pos + 1] != '#') { pos = pos + 1; e.subj = e.copy(); e.verb = e.cverb = "^"; e.bound = true; e.obj = parse(false, tokenize()); e.near = null; path(e); } } internal String toURI(String s) { if (s.Equals("") || s.StartsWith("#")) s = u + s; if (!u.Equals("") && s.IndexOf('?') == -1) { try { if (s.EndsWith("#")) s = new Uri(new Uri(u.Substring(0, u.LastIndexOf('/') + 1)), s.Substring(0, s.Length -1)).ToString() + '#'; else if (!u.StartsWith("_:")) s = new Uri(new Uri(u.Substring(0, u.LastIndexOf('/') + 1)), s).ToString(); } catch (Exception e) { Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": found " + s + "\n" + e); } } if (s.EndsWith(".n3#")) s = s.Substring(0, s.Length - 4) + "#"; return s; } } }