// $Id: ParserNew.cs 1295 2007-05-11 16:52:51Z josd $ // PxButton | build | csc /o /doc:Euler.xml *.cs | using System; using System.Collections; using System.Text; using System.Xml; /// N3 parser /// Jos De Roo public class Parser { internal String str; // N3 string internal int pos = 0; // tokenizer position internal ArrayList vt = null; // quantified variables table internal Euler r = null; // root Euler object internal String u = null; // base URI of the RDF resource internal const String ST = "{}[]()<>\"';,.^! \t\r\n\\"; internal const String LOGa = "a"; internal const String LOGe = "="; internal const String LOGi = "=>"; internal const String LOGimplies = ""; internal const String DPO = "constructs an N3 parser public Parser() { } /// constructs an N3 parser /// the N3 string /// quantified variables table /// Euler object /// URI of the RDF resource public Parser(String s, ArrayList vartab, Euler root, String uri) { str = s; vt = vartab; r = root; u = uri; } /// N3 triple parse method /// Euler object public virtual Euler Parse() { String nt = tokenize(); if (nt == null) return null; return parse(true, nt); } /// N3 node parse method /// next token /// Euler object public virtual Euler Parse(String nt) { return parse(false, nt); } internal Euler parse(bool b, String nt) { Euler e = new Euler(); try { if (b) { if (nt == null) e.subj = null; else if (nt.Equals("{")) { nt = tokenize(); if (nt.Equals(".")) e.subj = parse(false, "{}"); else e.subj = parse(true, nt); Euler el = e.subj; nt = tokenize(); while (el.near != null) el = el.near; while (!nt.Equals("}")) { el.near = parse(true, nt); while (el.near != null) el = el.near; nt = tokenize(); } } else if (nt.Equals("[")) { e.subj = parse(true, null); if (e.subj.verb == null) r.verb = ";]"; nt = tokenize(); if (!nt.Equals("]")) Console.Error.WriteLine("** (p1) expecting ] at " + e + " but got " + nt); } else if (nt.Equals("(")) { list(e); e.subj = e.obj; e.obj = null; } else { e.subj = parse(false, nt); if (nt.Equals("this")) e.subj.verb = '<' + u + "#frag" + e.GetHashCode() + '>'; } path(e.subj); nt = tokenize(); } if (nt == null || nt.Equals(";")) { e.bound = true; return e; } else if (e.subj != null && nt.StartsWith("_:")) { e.cverb = nt; if (!vt.Contains(nt)) vt.Add(nt); } else if (nt.Equals("[")) { String a = "_:" + e.GetHashCode(); if (!vt.Contains(a)) vt.Add(a); Euler ap = parse(true, a); e.cverb = a; if (ap.verb != null) r.verb = ";]"; nt = tokenize(); if (!nt.Equals("]")) Console.Error.WriteLine("** (p2) expecting ] at " + e + " but got " + nt); e.near = ap; } else if (nt.Equals("(")) { list(e); return e.obj; } else e.cverb = nt; e.verb = e.cverb; if (e.verb.EndsWith("@@")) e.verb = e.verb.Substring(0, e.verb.Length - 2); String s1 = e.verb; String s2 = ""; if (e.verb.IndexOf('"') != -1) { s1 = e.verb.Substring(0, e.verb.IndexOf('"')); s2 = e.verb.Substring(e.verb.IndexOf('"')); } if (r != null && s1.Length > 0 && !s1.Equals(LOGa) && !s1.Equals(LOGe) && s1[0] != '(' && s1[0] != '<' && s1[0] != '[' && s1[0] != '_' && s1[0] != '"' && s1[0] != '\'' && s1.IndexOf(':') != - 1) { String pf = s1.Substring(0, s1.IndexOf(':') + 1); String pg = (String) r.nsp[pf]; if (pg != null) pf = pg; String qf = (String) r.hshtNs[pf]; if (qf == null) { Console.Error.WriteLine("** no @prefix " + pf + " found, taking <" + u + "#>"); qf = "<" + u + "#>"; r.hshtNs[pf] = qf; } StringBuilder sb = new StringBuilder(pf); sb.Append(e.cverb.Substring(e.cverb.IndexOf(':') + 1)); e.cverb = sb.ToString(); sb = new StringBuilder(qf); sb.Insert(sb.Length - 1, s1.Substring(s1.IndexOf(':') + 1)); e.verb = sb.ToString() + s2; } else if (u != null && s1.Length > 0 && s1[0] == '<' && s1.IndexOf(':') == - 1) e.verb = e.cverb = '<' + toURI(s1.Substring(1, s1.Length - 2)) + '>' + s2; if (e.verb.StartsWith("'")) e.verb = "'" + Double.Parse(e.verb.Substring(1, e.verb.Length - 2)) + "'"; if (e.verb.Equals(LOGa)) e.verb = RDFtype; if (e.verb.Equals(LOGe)) e.verb = OWLsameAs; if (e.verb.Equals(LOGi)) e.verb = LOGimplies; if (e.verb.StartsWith(DPO)) e.verb = OWL + e.verb.Substring(DPO.Length); if (e.verb.StartsWith(ONT)) e.verb = OWL + e.verb.Substring(ONT.Length); if (e.verb.Equals(OWLequivalentTo)) e.verb = OWLsameAs; if (e.verb.Equals(OWLsameIndividualAs)) e.verb = OWLsameAs; if (e.verb.Equals(OWLsameClassAs)) e.verb = OWLequivalentClass; if (e.verb.Equals(OWLsamePropertyAs)) e.verb = OWLequivalentProperty; if (e.verb.Equals(OWLdifferentIndividualFrom)) e.verb = OWLdifferentFrom; if (e.verb.Equals(OWLUnambiguousProperty)) e.verb = OWLInverseFunctionalProperty; if (e.verb.Equals(OWLUniqueProperty)) e.verb = OWLFunctionalProperty; if (e.verb.StartsWith("?") && !vt.Contains(e.verb)) vt.Add(e.verb); if (vt != null && vt.Count > 0) e.varid = vt.IndexOf(e.verb); if (e.varid == - 1) e.bound = true; if (nt.Equals(".") && (str[pos] == ' ' || str[pos] == '\n')) { e.obj = parse(false, ""); e.verb = ""; e.cverb = ""; e.bound = true; return e; } if (str != null && pos < str.Length && str[pos] == '^' && str[pos + 1] == '^') { pos = pos + 2; Euler et = parse(false, tokenize()); if (!et.verb.Equals(XSDstring)) { e.subj = e.copy(); e.verb = e.cverb = "^^"; e.bound = true; e.obj = et; String.Intern(e.obj.verb); if (e.subj.bound) Datatype.Compare(e.obj.verb, r.getLit(e.subj), r.getLit(e.subj)); } } if (b) { nt = tokenize(); if (nt.Equals("{")) { nt = tokenize(); if (nt.Equals(".")) e.obj = parse(false, "{}"); else e.obj = parse(true, nt); Euler el = e.obj; nt = tokenize(); while (nt != null && !nt.Equals("}")) { el.near = parse(true, nt); el = el.near; nt = tokenize(); } } else if (nt.Equals("[")) { e.obj = parse(true, null); if (e.obj.verb != null) r.verb = ";]"; nt = tokenize(); if (!nt.Equals("]")) Console.Error.WriteLine("** (p3) expecting ] at " + e + " but got " + nt); } else if (nt.Equals("(")) list(e); else e.obj = parse(false, nt); path(e.obj); nt = tokenize(); if (nt.EndsWith("\"\"\"")) { e.obj.cverb = e.obj.cverb + nt; e.obj.verb = e.obj.cverb; nt = tokenize(); } Euler el2 = e; while (nt.Equals(";") || nt.Equals(",")) { while (el2.near != null) el2 = el2.near; if (nt.Equals(";")) { nt = tokenize(); if (nt.Equals("]")) { r.verb = ";]"; nt = ";"; break; } else if (nt.Equals(".")) break; } else nt = el2.cverb; el2.near = parse(false, nt); el2.near.subj = e.subj; nt = tokenize(); if (nt.Equals("{")) { nt = tokenize(); if (nt.Equals(".")) el2.near.obj = parse(false, "{}"); else el2.near.obj = parse(true, nt); Euler ef = el2.near.obj; nt = tokenize(); while (nt != null && !nt.Equals("}")) { ef.near = parse(true, nt); ef = ef.near; nt = tokenize(); } } else if (nt.Equals("[")) { el2.near.obj = parse(true, null); if (el2.near.obj.verb != null) r.verb = ";]"; nt = tokenize(); if (!nt.Equals("]")) Console.Error.WriteLine("** (p4) expecting ] at " + e + " but got " + nt); } else if (nt.Equals("(")) list(el2.near); else el2.near.obj = parse(false, nt); nt = tokenize(); if (nt.EndsWith("\"\"\"")) { el2.near.obj.cverb = el2.near.obj.cverb + nt; el2.near.obj.verb = el2.near.obj.cverb; nt = tokenize(); } swap(el2.near); } if (!nt.Equals(".") && !nt.Equals(";")) Console.Error.WriteLine("** (p5) expecting . or ; at " + e + " but got " + nt); } swap(e); e.far = r; return e; } catch (NullReferenceException exc) { Console.Error.WriteLine(exc.StackTrace); return e; } } internal String tokenize() { String nt = token(); while (pos < str.Length && nt == null) nt = token(); return nt; } internal String next() { int start = pos; while (pos < str.Length && ST.IndexOf(str[pos]) < 0) pos++; if (start == pos && ST.IndexOf(str[pos]) >= 0) pos++; return str.Substring(start, (pos) - (start)); } internal String token() { if (r.verb != null && r.verb.Equals(".}")) return r.verb = "}"; if (r.verb != null && r.verb.Equals(";]")) return r.verb = "]"; if (pos >= str.Length) return null; int start = pos; String t = null; String nt = next(); if (nt.IndexOf('#') != - 1) { t = nt.Substring(0, nt.IndexOf('#')); String v = r.verb; while (nt != null && !nt.Equals("\n") && pos < str.Length) nt = next(); r.verb = v; if (t.Equals("")) return null; } else if (nt.Equals("@prefix") || nt.Equals("bind")) { String nsc = tokenize(); if (nsc.Equals("default")) nsc = ":"; String nsd = nsc; String nsu = tokenize(); if (u != null && nsu.Length > 0 && nsu[0] == '<' && nsu.IndexOf(':') == - 1) nsu = '<' + toURI(nsu.Substring(1, nsu.Length - 2)) + '>'; String nsv = (String) r.hshtNs[nsd]; while (nsv != null && !nsu.Equals(nsv)) { nsd = "ns" + nsd; nsv = (String) r.hshtNs[nsd]; } r.nsp[nsd] = nsd; r.nsp[nsc] = nsd; r.hshtNs[nsd] = nsu; tokenize(); return null; } else if (nt.Equals("\"")) { StringBuilder sb = new StringBuilder("\""); if (str[pos] == '"' && str[pos + 1] == '"') { sb.Append(next()); sb.Append(next()); while (true) { sb.Append(next()); if (sb.ToString().EndsWith("\"\"\"") && str[pos] != '"' && str[pos + 1] != '"') break; } t = sb.ToString(); } else { nt = next(); while (!nt.Equals("\"")) { if (nt.Equals("\\")) { sb.Append(nt); nt = next(); } sb.Append(nt); if (pos >= str.Length) { Console.Error.WriteLine("** (t1) expecting \" at " + sb); break; } nt = next(); } sb.Append("\""); String lang = ""; if (str[pos] == '@') { lang = token(); int i = lang.IndexOf('-'); if (i != -1) sb.Append(lang.Substring(0, i) + lang.Substring(i).ToUpper()); else sb.Append(lang); } t = sb.ToString(); } } else if (nt.Equals("'")) { StringBuilder sb = new StringBuilder("'"); nt = next(); while (!nt.Equals("'")) { sb.Append(nt); if (pos >= str.Length) { Console.Error.WriteLine("** (t2) expecting ' at " + sb); break; } nt = next(); } t = sb.Append("'").ToString(); } else if (nt.Equals("<")) { StringBuilder sb = new StringBuilder("<"); nt = next(); while (!nt.Equals(">")) { sb.Append(nt); if (pos >= str.Length) { Console.Error.WriteLine("** (t2) expecting > at " + sb); break; } nt = next(); } t = sb.Append(">").ToString(); } else if (nt.Equals("=")) { if (str[pos] == '>') t = nt + next(); else t = nt; } else if (nt.Equals("is")) { t = tokenize(); nt = tokenize(); if (!nt.Equals("of")) Console.Error.WriteLine("** (t4) expecting \"of\" but got " + nt); t = t + "@@"; } else if (nt.Equals("has")) { t = tokenize(); nt = tokenize(); if (!nt.Equals("of")) Console.Error.WriteLine("** (t5) expecting \"of\" but got " + nt); } else if (nt.Equals(" ") || nt.Equals("\t") || nt.Equals("\r") || nt.Equals("\n") || nt.Equals("-") || nt.Equals(">")) return null; else if (nt.StartsWith("_:") && !nt.EndsWith("_" + Euler.doc)) t = nt + "_" + Euler.doc; else t = nt; if (pos < str.Length && ST.IndexOf(t) == -1 && t[0] != '"' && str[pos] == '"') t = t + token(); if (pos < str.Length && str[pos] == '.' && str[pos + 1] != ' ' && str[pos + 1] != '\n' && str[pos + 1] != ']' && str[pos + 1] != '}' && str[pos + 1] != '#') { int opos = pos; try { t = XmlConvert.ToDouble(t + next() + next()).ToString(); } catch (Exception) { pos = opos; } } if (r.verb != null && t != null && !r.verb.Equals(".") && t.Equals("}")) { r.verb = ".}"; return "."; } if (r.verb != null && t != null && !r.verb.Equals(";") && t.Equals("]")) { r.verb = ";]"; return ";"; } return r.verb = t; } internal void swap(Euler e) { if (e != null && e.subj == null && e.cverb.EndsWith("@@")) { e.subj = e.obj; e.obj = parse(false, e.cverb.Substring(0, e.cverb.Length - 2)); e.verb = e.cverb = "."; } else if (e.subj != null && e.obj != null && e.cverb.EndsWith("@@")) { Euler el = e.subj; e.subj = e.obj; e.obj = el; e.cverb = e.cverb.Substring(0, e.cverb.Length - 2); } } internal void list(Euler e) { Euler el = e; while (true) { String nt = tokenize(); if (nt.StartsWith("@")) { el.obj = parse(false, '?' + nt.Substring(1)); nt = tokenize(); if (!nt.Equals(")")) Console.Error.WriteLine("** (p6) expecting ) at " + el + " but got " + nt); break; } else if (nt.Equals(")")) { el.obj = parse(false, RDFnil); break; } else el.obj = parse(false, RDFfirst); if (nt.Equals("[")) { el.obj.obj = parse(true, null); tokenize(); } else el.obj.obj = parse(false, nt); path(el.obj.obj); el.obj.near = parse(false, RDFrest); el = el.obj.near; } path(e.obj); } internal void path(Euler e) { if (str != null && pos < str.Length - 1 && str[pos] == '.' && str[pos + 1] != ' ' && str[pos + 1] != '\n' && str[pos + 1] != ']' && str[pos + 1] != '}' && str[pos + 1] != '#') { pos = pos + 1; e.subj = e.copy(); e.verb = e.cverb = "."; e.bound = true; e.obj = parse(false, tokenize()); String.Intern(e.obj.verb); path(e); } if (str != null && pos < str.Length - 1 && str[pos] == '^' && str[pos + 1] != '^' && str[pos + 1] != ' ' && str[pos + 1] != '\n' && str[pos + 1] != ']' && str[pos + 1] != '}' && str[pos + 1] != '#') { pos = pos + 1; e.subj = e.copy(); e.verb = e.cverb = "^"; e.bound = true; e.obj = parse(false, tokenize()); String.Intern(e.obj.verb); path(e); } } internal String toURI(String s) { if (s.Equals("") || s.Equals("#")) s = u; if (!u.StartsWith("file:")) { try { if (s.EndsWith("#")) s = new Uri(new Uri(u.Substring(0, u.LastIndexOf('/') + 1)), s.Substring(0, s.Length -1)).ToString() + '#'; else s = new Uri(new Uri(u), s).ToString(); } catch (Exception e) { Console.Error.WriteLine(u + " " + s + " " + e); } } if (s.EndsWith(".n3#")) s = s.Substring(0, s.Length - 4) + "#"; return s; } }