// $Id: Parser.cs 1295 2007-05-11 16:52:51Z josd $
namespace Eulersharp {
using System;
using System.Collections;
using System.Globalization;
using System.Text;
using System.Xml;
/// N3 parser
/// Jos De Roo
public class Parser {
internal String str; // N3 string
internal int pos = 0; // tokenizer position
internal int line = 1; // line counter
internal ArrayList vt = null; // quantified variables table
internal Euler r = null; // root Euler object
internal String u = null; // base URI of the RDF resource
internal const String ST = "{}[]()<>\"';,.^! \t\r\n\\";
internal const String LOGa = "a";
internal const String LOGe = "=";
internal const String LOGi = "=>";
internal const String DPO = "constructs an N3 parser
public Parser() {
}
/// constructs an N3 parser
/// the N3 string
/// quantified variables table
/// Euler object
/// URI of the RDF resource
public Parser(String s, ArrayList vartab, Euler root, String uri) {
str = s;
vt = vartab;
r = root;
u = uri;
}
/// N3 triple parse method
/// Euler object
public virtual Euler Parse() {
String nt = tokenize();
if (nt == null) return null;
return parse(true, nt);
}
/// N3 node parse method
/// next token
/// Euler object
public virtual Euler Parse(String nt) {
return parse(false, nt);
}
internal Euler parse(bool b, String nt) {
Euler e = new Euler();
try {
if (u != null && u.StartsWith("_:")) e.src = Euler.Ekb;
else e.src = "<" + u + ">";
e.line = line;
if (b) {
if (nt == null) e.subj = null;
else if (nt.Equals("{")) {
nt = tokenize();
if (nt.Equals(".")) e.subj = parse(false, "{}");
else e.subj = parse(true, nt);
Euler el = e.subj;
while (el.near != null) el = el.near;
nt = tokenize();
while (!nt.Equals("}")) {
el.near = parse(true, nt);
while (el.near != null) el = el.near;
nt = tokenize();
}
}
else if (nt.Equals("[")) {
e.subj = parse(true, null);
if (e.subj.verb == null) r.verb = ";]";
nt = tokenize();
if (!nt.Equals("]")) Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (p1) expecting ] at " + e + " but got " + nt);
int cpos = pos;
nt = tokenize();
if (nt.Equals(".")) {
e.obj = parse(false, "");
e.verb = "";
e.cverb = "";
e.bound = true;
return e;
}
else pos = cpos;
}
else if (nt.Equals("(")) {
list(e);
e.subj = e.obj;
e.obj = null;
int cpos = pos;
nt = tokenize();
if (nt.Equals(".")) {
e.obj = parse(false, "");
e.verb = "";
e.cverb = "";
e.bound = true;
return e;
}
else pos = cpos;
}
else {
e.subj = parse(false, nt);
if (nt.Equals("this")) e.subj.verb = '<' + u + "#frag" + e.uid + '>';
}
path(e.subj);
if (nt != null && nt.Equals("@forAll")) {
e.subj = parse(false, "this");
nt = "log:forAll";
}
else if (nt != null && nt.Equals("@forSome")) {
e.subj = parse(false, "this");
nt = "log:forSome";
}
else nt = tokenize();
}
if (nt == null || nt.Equals(";")) {
e.bound = true;
return e;
}
else if (e.subj != null && nt.StartsWith("_:")) {
e.cverb = nt;
if (!vt.Contains(nt)) vt.Add(nt);
}
else if (nt.Equals("[")) {
String a = "_:e" + e.uid;
if (!vt.Contains(a)) vt.Add(a);
Euler ap = parse(true, a);
e.cverb = a;
if (ap.verb != null) r.verb = ";]";
nt = tokenize();
if (!nt.Equals("]")) Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (p2) expecting ] at " + e + " but got " + nt);
if (ap.verb.Equals(Euler.OWLsameAs)) {
e.cverb = ap.obj.cverb;
e.vv = true;
}
else e.near = ap;
}
else if (nt.Equals("(")) {
list(e);
return e.obj;
}
else e.cverb = nt;
if (e.cverb.EndsWith("_@")) {
e.vv = true;
e.cverb = e.cverb.Substring(0, e.cverb.Length - 2);
}
e.verb = e.cverb;
if (e.verb.EndsWith("@@")) e.verb = e.verb.Substring(0, e.verb.Length - 2);
if (r != null && e.verb.Length > 0 && e.verb[0] == '<') {
String nsq = toURI(e.verb.Substring(1, e.verb.Length - 2));
int nsh = nsq.LastIndexOf('#');
int nss = nsq.LastIndexOf('/');
String nsu = '<' + (nsh != -1 ? nsq.Substring(0, nsh + 1) : nsq.Substring(0, nss + 1)) + '>';
String nsn = nsh != -1 ? nsq.Substring(nsh + 1) : nsq.Substring(nss + 1);
String nsp = null;
for (IEnumerator enr = r.ext.ns.Keys.GetEnumerator(); enr.MoveNext(); ) {
String nsx = (String)enr.Current;
String nsy = (String)r.ext.ns[nsx];
if (nsu.Equals(nsy)) nsp = nsx;
}
if (nsp == null) {
int i = 0;
while (true) {
nsp = "nsp" + i + ':';
if (r.ext.ns[nsp] == null) break;
i++;
}
r.ext.ns[nsp] = nsu;
}
e.verb = e.cverb = nsp + nsn;
}
String s1 = e.verb;
String s2 = "";
if (e.verb.IndexOf('"') != -1) {
s1 = e.verb.Substring(0, e.verb.IndexOf('"'));
s2 = e.verb.Substring(e.verb.IndexOf('"'));
}
if (r != null && s1.Length > 0 && s1.IndexOf(':') == -1 &&
s1[0] != '(' && s1[0] != '<' && s1[0] != '[' &&
s1[0] != '_' && s1[0] != '"' && s1[0] != '?' &&
!r.isNumeral(s1) && r.ext.kw[s1] == null) s1 = ":" + s1;
if (r != null && s1.Length > 0 && !s1.Equals(LOGa) && !s1.Equals(LOGe) &&
s1[0] != '(' && s1[0] != '<' && s1[0] != '[' &&
s1[0] != '_' && s1[0] != '"' && s1.IndexOf(':') != - 1) {
String pf = s1.Substring(0, s1.IndexOf(':') + 1);
String pg = (String) r.ext.nsp[pf];
if (pg != null) pf = pg;
String qf = (String) r.ext.ns[pf];
if (qf == null) {
Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": no @prefix " + pf + " found for " + s1 + ", taking <" + u + "#>");
qf = "<" + u + "#>";
r.ext.ns[pf] = qf;
}
StringBuilder sb = new StringBuilder(pf);
sb.Append(e.cverb.Substring(e.cverb.IndexOf(':') + 1));
e.cverb = sb.ToString();
sb = new StringBuilder(qf);
sb.Insert(sb.Length - 1, s1.Substring(s1.IndexOf(':') + 1));
e.verb = sb.ToString() + s2;
}
else if (u != null && s1.Length > 0 && s1[0] == '<' && s1.IndexOf(':') == - 1)
e.verb = e.cverb = '<' + toURI(s1.Substring(1, s1.Length - 2)) + '>' + s2;
if (e.verb.Length > 0 && e.verb[0] == '?' && e.verb.IndexOf('@') != -1 ||
e.verb.IndexOf("\"@") != -1 && str[pos] != '^' && str[pos + 1] != '^') {
r.ext.kw[e.verb.Substring(e.verb.LastIndexOf('@') + 1)] = r;
e.subj = parse(false, e.verb.Substring(0, e.verb.LastIndexOf('@')));
e.obj = parse(false, e.verb.Substring(e.verb.LastIndexOf('@') + 1));
e.verb = e.cverb = "@";
}
if (e.verb.Equals(LOGa)) e.verb = Euler.RDFtype;
if (e.verb.Equals(LOGe)) e.verb = Euler.OWLsameAs;
if (e.verb.Equals(LOGi)) e.verb = Euler.LOGimplies;
if (e.verb.StartsWith(DPO)) e.verb = Euler.OWL + e.verb.Substring(DPO.Length);
if (e.verb.StartsWith(ONT)) e.verb = Euler.OWL + e.verb.Substring(ONT.Length);
if (e.verb.Equals(OWLequivalentTo)) e.verb = Euler.OWLsameAs;
if (e.verb.Equals(OWLsameIndividualAs)) e.verb = Euler.OWLsameAs;
if (e.verb.Equals(OWLsameClassAs)) e.verb = Euler.OWLequivalentClass;
if (e.verb.Equals(OWLsamePropertyAs)) e.verb = Euler.OWLequivalentProperty;
if (e.verb.Equals(OWLdifferentIndividualFrom)) e.verb = Euler.OWLdifferentFrom;
if (e.verb.Equals(OWLUnambiguousProperty)) e.verb = Euler.OWLInverseFunctionalProperty;
if (e.verb.Equals(OWLUniqueProperty)) e.verb = Euler.OWLFunctionalProperty;
if (e.verb.Equals(Euler.RDFfirst)) e.cverb = "rdf:first";
if (e.verb.Equals(Euler.RDFrest)) e.cverb = "rdf:rest";
if (e.verb.Equals(Euler.RDFnil)) e.cverb = "()";
if (e.verb.StartsWith("?") && vt != null && !vt.Contains(e.verb)) vt.Add(e.verb);
if (vt != null && vt.Count > 0) e.varid = vt.IndexOf(e.verb);
if (e.varid == - 1) e.bound = true;
if (nt.Equals(".") && (str[pos] == ' ' || str[pos] == '\t' || str[pos] == '\r' || str[pos] == '\n')) {
e.obj = parse(false, "");
e.verb = "";
e.cverb = "";
e.bound = true;
return e;
}
if (!nt.Equals(Euler.RDFfirst) && str != null && pos < str.Length && str[pos] == '^' && str[pos + 1] == '^') {
pos = pos + 2;
Euler et = parse(false, tokenize());
if (!et.verb.Equals(Euler.XSDstring)) {
e.subj = e.copy();
e.verb = e.cverb = "^^";
e.bound = true;
e.obj = et;
}
}
if (b) {
nt = tokenize();
if (nt.Equals("{")) {
nt = tokenize();
if (nt.Equals(".")) e.obj = parse(false, "{}");
else e.obj = parse(true, nt);
Euler el = e.obj;
while (el.near != null) el = el.near;
nt = tokenize();
while (nt != null && !nt.Equals("}")) {
el.near = parse(true, nt);
while (el.near != null) el = el.near;
nt = tokenize();
}
}
else if (nt.Equals("[")) {
e.obj = parse(true, null);
if (e.obj.verb != null) r.verb = ";]";
nt = tokenize();
if (!nt.Equals("]")) Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (p3) expecting ] at " + e + " but got " + nt);
}
else if (nt.Equals("(")) list(e);
else e.obj = parse(false, nt);
path(e.obj);
nt = tokenize();
if (nt == null) return e;
if (nt.EndsWith("\"\"\"")) {
e.obj.cverb = e.obj.cverb + nt;
e.obj.verb = e.obj.cverb;
nt = tokenize();
}
Euler el2 = e;
while (nt.Equals(";") || nt.Equals(",")) {
while (el2.near != null) el2 = el2.near;
if (nt.Equals(";")) {
nt = tokenize();
if (nt.Equals("]")) {
r.verb = ";]";
nt = ";";
break;
}
else if (nt.Equals(".")) break;
}
else nt = el2.cverb;
el2.near = parse(false, nt);
el2.near.subj = e.subj;
nt = tokenize();
if (nt.Equals("{")) {
nt = tokenize();
if (nt.Equals(".")) el2.near.obj = parse(false, "{}");
else el2.near.obj = parse(true, nt);
Euler ef = el2.near.obj;
while (ef.near != null) ef = ef.near;
nt = tokenize();
while (nt != null && !nt.Equals("}")) {
ef.near = parse(true, nt);
while (ef.near != null) ef = ef.near;
nt = tokenize();
}
}
else if (nt.Equals("[")) {
el2.near.obj = parse(true, null);
if (el2.near.obj.verb != null) r.verb = ";]";
nt = tokenize();
if (!nt.Equals("]")) Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (p4) expecting ] at " + e + " but got " + nt);
}
else if (nt.Equals("(")) list(el2.near);
else el2.near.obj = parse(false, nt);
nt = tokenize();
if (nt == null) return e;
if (nt.EndsWith("\"\"\"")) {
el2.near.obj.cverb = el2.near.obj.cverb + nt;
el2.near.obj.verb = el2.near.obj.cverb;
nt = tokenize();
}
swap(el2.near);
}
if (!nt.Equals(".") && !nt.Equals(";"))
Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (p5) expecting . or ; at " + e + " but got " + nt);
}
swap(e);
if (e.verb.Equals(Euler.LOGimplies) && e.subj != null && e.subj.verb.Equals("{}")) {
e.obj.vv = true;
e = e.obj;
}
e.far = r;
return e;
}
catch (NullReferenceException exc) {
Console.Error.WriteLine(exc);
return e;
}
}
internal String tokenize() {
String nt = token();
while (pos < str.Length && nt == null) nt = token();
return nt;
}
internal String next() {
int start = pos;
while (pos < str.Length && ST.IndexOf(str[pos]) < 0) pos++;
if (pos < str.Length && start == pos && ST.IndexOf(str[pos]) >= 0) pos++;
if (pos < str.Length && str[pos] == '\n') line++;
return str.Substring(start, (pos) - (start));
}
internal String token() {
if (r.verb != null && r.verb.Equals(".}")) return r.verb = "}";
if (r.verb != null && r.verb.Equals(";]")) return r.verb = "]";
if (pos >= str.Length) return null;
int start = pos;
String t = null;
String nt = next();
if (nt.IndexOf('#') != - 1) {
t = nt.Substring(0, nt.IndexOf('#'));
String v = r.verb;
while (nt != null && !nt.Equals("\n") && pos < str.Length) nt = next();
r.verb = v;
if (t.Equals("")) return null;
}
else if (nt.Equals("SELECT")) {
StringBuilder sb = new StringBuilder("(");
nt = tokenize();
while (!nt.Equals("\r") && !nt.Equals("\n") && pos < str.Length) {
sb.Append(nt);
nt = next();
}
t = sb.Append(')').ToString();
}
else if (nt.Equals("CONSTRUCT")) {
t = tokenize();
}
else if (nt.Equals("FROM")) {
// TODO
nt = tokenize();
if (nt.Equals("NAMED")) tokenize();
t = tokenize();
}
else if (nt.Equals("UNION")) {
t = ".";
}
else if (nt.Equals("OPTIONAL")) {
// TODO
t = tokenize();
}
else if (nt.Equals("FILTER")) {
// TODO
t = tokenize();
}
else if (nt.Equals("@prefix") || nt.Equals("PREFIX")) {
String nsc = tokenize();
if (nsc.Equals("default")) nsc = ":";
String nsd = null;
String nsu = tokenize();
if (u != null && nsu.Length > 0 && nsu[0] == '<' && nsu.IndexOf(':') == - 1)
nsu = '<' + toURI(nsu.Substring(1, nsu.Length - 2)) + '>';
for (IEnumerator enr = r.ext.ns.Keys.GetEnumerator(); enr.MoveNext(); ) {
String nsx = (String)enr.Current;
String nsy = (String)r.ext.ns[nsx];
if (nsu.Equals(nsy)) nsd = nsx;
}
if (nsd == null) {
nsd = nsc;
String nsv = (String)r.ext.ns[nsd];
int i = 0;
while (nsv != null && !nsu.Equals(nsv)) {
nsd = "p" + i + nsc;
nsv = (String)r.ext.ns[nsd];
i++;
}
}
r.ext.nsp[nsc] = nsd;
r.ext.ns[nsd] = nsu;
if (nt.Equals("@prefix")) tokenize();
return null;
}
else if (nt.Equals("@keywords")) {
while (nt != null && !nt.Equals(".") && pos < str.Length) {
nt = next();
if (!nt.Equals(" ") && !nt.Equals("\t") && !nt.Equals("\r") && !nt.Equals("\n") && !nt.Equals(",") && !nt.Equals(".")) r.ext.kw[nt] = r;
}
t = tokenize();
}
else if (nt.Equals("\"")) {
StringBuilder sb = new StringBuilder("\"");
if (str[pos] == '"' && str[pos + 1] == '"') {
sb.Append(next()).Append(next());
while (pos < str.Length && (str[pos] != '"' || str[pos + 1] != '"' || str[pos + 2] != '"')) sb.Append(next());
sb.Append(next()).Append(next()).Append(next());
}
else {
nt = next();
while (!nt.Equals("\"")) {
if (nt.Equals("\\")) {
sb.Append(nt);
nt = next();
}
sb.Append(nt);
if (pos >= str.Length) {
Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (t1) expecting \" at " + sb);
break;
}
nt = next();
}
sb.Append("\"");
}
if (pos < str.Length && str[pos] == '@') {
String lang = token();
int i = lang.IndexOf('-');
if (i != -1) sb.Append(lang.Substring(0, i) + lang.Substring(i).ToUpper());
else sb.Append(lang);
}
t = sb.ToString();
}
else if (nt.Equals("'")) {
StringBuilder sb = new StringBuilder("'");
nt = next();
while (!nt.Equals("'")) {
sb.Append(nt);
if (pos >= str.Length) {
Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (t2) expecting ' at " + sb);
break;
}
nt = next();
}
t = sb.Append("'").ToString();
}
else if (nt.Equals("<")) {
StringBuilder sb = new StringBuilder("<");
nt = next();
while (!nt.Equals(">")) {
sb.Append(nt);
if (pos >= str.Length) {
Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (t2) expecting > at " + sb);
break;
}
nt = next();
}
t = sb.Append(">").ToString();
}
else if (nt.Equals("=")) {
if (str[pos] == '>') t = nt + next();
else t = nt;
}
else if (nt.Equals("is") || nt.Equals("@is")) {
t = tokenize();
nt = tokenize();
if (!(nt.Equals("of") || nt.Equals("@of"))) Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (t4) expecting \"of\" but got " + nt);
t = t + "@@";
}
else if (nt.Equals("has") || nt.Equals("@has")) {
t = tokenize() + "_@";
int cpos = pos;
nt = tokenize();
if (!(nt.Equals("of") || nt.Equals("@of"))) pos = cpos;
}
else if (nt.Equals(" ") || nt.Equals("\t") || nt.Equals("\r") || nt.Equals("\n") ||
nt.Equals("-") || nt.Equals(">"))
return null;
else if (nt.StartsWith("_:") && !nt.EndsWith("_")) t = nt + "_" + Euler.doc + "_";
else t = nt;
if (pos < str.Length && ST.IndexOf(t) == -1 && t[0] != '"' && str[pos] == '"') t = t + token();
while (pos < str.Length - 1 && str[pos] == '.' &&
str[pos + 1] != ' ' && str[pos + 1] != '\t' && str[pos + 1] != '\r' && str[pos + 1] != '\n' &&
str[pos + 1] != ']' && str[pos + 1] != '}' && str[pos + 1] != '#') {
t = t + next() + next();
}
if (r.verb != null && t != null && !r.verb.Equals(".") && t.Equals("}")) {
r.verb = ".}";
return ".";
}
if (r.verb != null && t != null && !r.verb.Equals(";") && t.Equals("]")) {
r.verb = ";]";
return ";";
}
return r.verb = t;
}
internal void swap(Euler e) {
if (e != null && e.subj == null && e.cverb.EndsWith("@@")) {
e.subj = e.obj;
e.obj = parse(false, e.cverb.Substring(0, e.cverb.Length - 2));
e.verb = e.cverb = "!";
}
else if (e.subj != null && e.obj != null && e.cverb.EndsWith("@@")) {
Euler el = e.subj;
e.subj = e.obj;
e.obj = el;
e.cverb = e.cverb.Substring(0, e.cverb.Length - 2);
}
}
internal void list(Euler e) {
Euler el = e;
while (true) {
String nt = tokenize();
if (nt.StartsWith("@")) {
el.obj = parse(false, '?' + nt.Substring(1));
nt = tokenize();
if (!nt.Equals(")")) Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": (p6) expecting ) at " + el + " but got " + nt);
break;
}
else if (nt.Equals(")")) {
el.obj = parse(false, Euler.RDFnil);
break;
}
else el.obj = parse(false, Euler.RDFfirst);
if (nt.Equals("{")) {
nt = tokenize();
if (nt.Equals(".")) el.obj.obj = parse(false, "{}");
else el.obj.obj = parse(true, nt);
Euler er = el.obj.obj;
while (er.near != null) er = er.near;
nt = tokenize();
while (!nt.Equals("}")) {
er.near = parse(true, nt);
while (er.near != null) er = er.near;
nt = tokenize();
}
}
else if (nt.Equals("[")) {
el.obj.obj = parse(true, null);
tokenize();
}
else el.obj.obj = parse(false, nt);
path(el.obj.obj);
el.obj.near = parse(false, Euler.RDFrest);
el = el.obj.near;
}
path(e.obj);
}
internal void path(Euler e) {
if (str != null && pos < str.Length - 1 && str[pos] == '!' &&
str[pos + 1] != ' ' && str[pos + 1] != '\t' && str[pos + 1] != '\r' && str[pos + 1] != '\n' &&
str[pos + 1] != ']' && str[pos + 1] != '}' && str[pos + 1] != '#') {
pos = pos + 1;
e.subj = e.copy();
e.verb = e.cverb = "!";
e.bound = true;
e.obj = parse(false, tokenize());
e.near = null;
path(e);
}
if (str != null && pos < str.Length - 1 && str[pos] == '^' &&
str[pos + 1] != '^' && str[pos + 1] != ' ' && str[pos + 1] != '\t' && str[pos + 1] != '\r' && str[pos + 1] != '\n' &&
str[pos + 1] != ']' && str[pos + 1] != '}' && str[pos + 1] != '#') {
pos = pos + 1;
e.subj = e.copy();
e.verb = e.cverb = "^";
e.bound = true;
e.obj = parse(false, tokenize());
e.near = null;
path(e);
}
}
internal String toURI(String s) {
if (s.Equals("") || s.StartsWith("#")) s = u + s;
if (!u.Equals("") && s.IndexOf('?') == -1) {
try {
if (s.EndsWith("#")) s = new Uri(new Uri(u.Substring(0, u.LastIndexOf('/') + 1)), s.Substring(0, s.Length -1)).ToString() + '#';
else if (!u.StartsWith("_:")) s = new Uri(new Uri(u.Substring(0, u.LastIndexOf('/') + 1)), s).ToString();
}
catch (Exception e) {
Console.Error.WriteLine("** Parser error ** " + u + " line " + line + ": found " + s + "\n" + e);
}
}
if (s.EndsWith(".n3#")) s = s.Substring(0, s.Length - 4) + "#";
return s;
}
}
}