// $Id: ParserNew.cs 1295 2007-05-11 16:52:51Z josd $
// PxButton | build | csc /o /doc:Euler.xml *.cs |
using System;
using System.Collections;
using System.Text;
using System.Xml;
/// N3 parser
/// Jos De Roo
public class Parser {
internal String str; // N3 string
internal int pos = 0; // tokenizer position
internal ArrayList vt = null; // quantified variables table
internal Euler r = null; // root Euler object
internal String u = null; // base URI of the RDF resource
internal const String ST = "{}[]()<>\"';,.^! \t\r\n\\";
internal const String LOGa = "a";
internal const String LOGe = "=";
internal const String LOGi = "=>";
internal const String LOGimplies = "";
internal const String DPO = "constructs an N3 parser
public Parser() {
}
/// constructs an N3 parser
/// the N3 string
/// quantified variables table
/// Euler object
/// URI of the RDF resource
public Parser(String s, ArrayList vartab, Euler root, String uri) {
str = s;
vt = vartab;
r = root;
u = uri;
}
/// N3 triple parse method
/// Euler object
public virtual Euler Parse() {
String nt = tokenize();
if (nt == null) return null;
return parse(true, nt);
}
/// N3 node parse method
/// next token
/// Euler object
public virtual Euler Parse(String nt) {
return parse(false, nt);
}
internal Euler parse(bool b, String nt) {
Euler e = new Euler();
try {
if (b) {
if (nt == null) e.subj = null;
else if (nt.Equals("{")) {
nt = tokenize();
if (nt.Equals(".")) e.subj = parse(false, "{}");
else e.subj = parse(true, nt);
Euler el = e.subj;
nt = tokenize();
while (el.near != null) el = el.near;
while (!nt.Equals("}")) {
el.near = parse(true, nt);
while (el.near != null) el = el.near;
nt = tokenize();
}
}
else if (nt.Equals("[")) {
e.subj = parse(true, null);
if (e.subj.verb == null) r.verb = ";]";
nt = tokenize();
if (!nt.Equals("]")) Console.Error.WriteLine("** (p1) expecting ] at " + e + " but got " + nt);
}
else if (nt.Equals("(")) {
list(e);
e.subj = e.obj;
e.obj = null;
}
else {
e.subj = parse(false, nt);
if (nt.Equals("this")) e.subj.verb = '<' + u + "#frag" + e.GetHashCode() + '>';
}
path(e.subj);
nt = tokenize();
}
if (nt == null || nt.Equals(";")) {
e.bound = true;
return e;
}
else if (e.subj != null && nt.StartsWith("_:")) {
e.cverb = nt;
if (!vt.Contains(nt)) vt.Add(nt);
}
else if (nt.Equals("[")) {
String a = "_:" + e.GetHashCode();
if (!vt.Contains(a)) vt.Add(a);
Euler ap = parse(true, a);
e.cverb = a;
if (ap.verb != null) r.verb = ";]";
nt = tokenize();
if (!nt.Equals("]")) Console.Error.WriteLine("** (p2) expecting ] at " + e + " but got " + nt);
e.near = ap;
}
else if (nt.Equals("(")) {
list(e);
return e.obj;
}
else e.cverb = nt;
e.verb = e.cverb;
if (e.verb.EndsWith("@@")) e.verb = e.verb.Substring(0, e.verb.Length - 2);
String s1 = e.verb;
String s2 = "";
if (e.verb.IndexOf('"') != -1) {
s1 = e.verb.Substring(0, e.verb.IndexOf('"'));
s2 = e.verb.Substring(e.verb.IndexOf('"'));
}
if (r != null && s1.Length > 0 && !s1.Equals(LOGa) && !s1.Equals(LOGe) &&
s1[0] != '(' && s1[0] != '<' && s1[0] != '[' &&
s1[0] != '_' && s1[0] != '"' && s1[0] != '\'' && s1.IndexOf(':') != - 1) {
String pf = s1.Substring(0, s1.IndexOf(':') + 1);
String pg = (String) r.nsp[pf];
if (pg != null) pf = pg;
String qf = (String) r.hshtNs[pf];
if (qf == null) {
Console.Error.WriteLine("** no @prefix " + pf + " found, taking <" + u + "#>");
qf = "<" + u + "#>";
r.hshtNs[pf] = qf;
}
StringBuilder sb = new StringBuilder(pf);
sb.Append(e.cverb.Substring(e.cverb.IndexOf(':') + 1));
e.cverb = sb.ToString();
sb = new StringBuilder(qf);
sb.Insert(sb.Length - 1, s1.Substring(s1.IndexOf(':') + 1));
e.verb = sb.ToString() + s2;
}
else if (u != null && s1.Length > 0 && s1[0] == '<' && s1.IndexOf(':') == - 1)
e.verb = e.cverb = '<' + toURI(s1.Substring(1, s1.Length - 2)) + '>' + s2;
if (e.verb.StartsWith("'")) e.verb = "'" + Double.Parse(e.verb.Substring(1, e.verb.Length - 2)) + "'";
if (e.verb.Equals(LOGa)) e.verb = RDFtype;
if (e.verb.Equals(LOGe)) e.verb = OWLsameAs;
if (e.verb.Equals(LOGi)) e.verb = LOGimplies;
if (e.verb.StartsWith(DPO)) e.verb = OWL + e.verb.Substring(DPO.Length);
if (e.verb.StartsWith(ONT)) e.verb = OWL + e.verb.Substring(ONT.Length);
if (e.verb.Equals(OWLequivalentTo)) e.verb = OWLsameAs;
if (e.verb.Equals(OWLsameIndividualAs)) e.verb = OWLsameAs;
if (e.verb.Equals(OWLsameClassAs)) e.verb = OWLequivalentClass;
if (e.verb.Equals(OWLsamePropertyAs)) e.verb = OWLequivalentProperty;
if (e.verb.Equals(OWLdifferentIndividualFrom)) e.verb = OWLdifferentFrom;
if (e.verb.Equals(OWLUnambiguousProperty)) e.verb = OWLInverseFunctionalProperty;
if (e.verb.Equals(OWLUniqueProperty)) e.verb = OWLFunctionalProperty;
if (e.verb.StartsWith("?") && !vt.Contains(e.verb)) vt.Add(e.verb);
if (vt != null && vt.Count > 0) e.varid = vt.IndexOf(e.verb);
if (e.varid == - 1) e.bound = true;
if (nt.Equals(".") && (str[pos] == ' ' || str[pos] == '\n')) {
e.obj = parse(false, "");
e.verb = "";
e.cverb = "";
e.bound = true;
return e;
}
if (str != null && pos < str.Length && str[pos] == '^' && str[pos + 1] == '^') {
pos = pos + 2;
Euler et = parse(false, tokenize());
if (!et.verb.Equals(XSDstring)) {
e.subj = e.copy();
e.verb = e.cverb = "^^";
e.bound = true;
e.obj = et;
String.Intern(e.obj.verb);
if (e.subj.bound) Datatype.Compare(e.obj.verb, r.getLit(e.subj), r.getLit(e.subj));
}
}
if (b) {
nt = tokenize();
if (nt.Equals("{")) {
nt = tokenize();
if (nt.Equals(".")) e.obj = parse(false, "{}");
else e.obj = parse(true, nt);
Euler el = e.obj;
nt = tokenize();
while (nt != null && !nt.Equals("}")) {
el.near = parse(true, nt);
el = el.near;
nt = tokenize();
}
}
else if (nt.Equals("[")) {
e.obj = parse(true, null);
if (e.obj.verb != null) r.verb = ";]";
nt = tokenize();
if (!nt.Equals("]")) Console.Error.WriteLine("** (p3) expecting ] at " + e + " but got " + nt);
}
else if (nt.Equals("(")) list(e);
else e.obj = parse(false, nt);
path(e.obj);
nt = tokenize();
if (nt.EndsWith("\"\"\"")) {
e.obj.cverb = e.obj.cverb + nt;
e.obj.verb = e.obj.cverb;
nt = tokenize();
}
Euler el2 = e;
while (nt.Equals(";") || nt.Equals(",")) {
while (el2.near != null) el2 = el2.near;
if (nt.Equals(";")) {
nt = tokenize();
if (nt.Equals("]")) {
r.verb = ";]";
nt = ";";
break;
}
else if (nt.Equals(".")) break;
}
else nt = el2.cverb;
el2.near = parse(false, nt);
el2.near.subj = e.subj;
nt = tokenize();
if (nt.Equals("{")) {
nt = tokenize();
if (nt.Equals(".")) el2.near.obj = parse(false, "{}");
else el2.near.obj = parse(true, nt);
Euler ef = el2.near.obj;
nt = tokenize();
while (nt != null && !nt.Equals("}")) {
ef.near = parse(true, nt);
ef = ef.near;
nt = tokenize();
}
}
else if (nt.Equals("[")) {
el2.near.obj = parse(true, null);
if (el2.near.obj.verb != null) r.verb = ";]";
nt = tokenize();
if (!nt.Equals("]")) Console.Error.WriteLine("** (p4) expecting ] at " + e + " but got " + nt);
}
else if (nt.Equals("(")) list(el2.near);
else el2.near.obj = parse(false, nt);
nt = tokenize();
if (nt.EndsWith("\"\"\"")) {
el2.near.obj.cverb = el2.near.obj.cverb + nt;
el2.near.obj.verb = el2.near.obj.cverb;
nt = tokenize();
}
swap(el2.near);
}
if (!nt.Equals(".") && !nt.Equals(";"))
Console.Error.WriteLine("** (p5) expecting . or ; at " + e + " but got " + nt);
}
swap(e);
e.far = r;
return e;
}
catch (NullReferenceException exc) {
Console.Error.WriteLine(exc.StackTrace);
return e;
}
}
internal String tokenize() {
String nt = token();
while (pos < str.Length && nt == null) nt = token();
return nt;
}
internal String next() {
int start = pos;
while (pos < str.Length && ST.IndexOf(str[pos]) < 0) pos++;
if (start == pos && ST.IndexOf(str[pos]) >= 0) pos++;
return str.Substring(start, (pos) - (start));
}
internal String token() {
if (r.verb != null && r.verb.Equals(".}")) return r.verb = "}";
if (r.verb != null && r.verb.Equals(";]")) return r.verb = "]";
if (pos >= str.Length) return null;
int start = pos;
String t = null;
String nt = next();
if (nt.IndexOf('#') != - 1) {
t = nt.Substring(0, nt.IndexOf('#'));
String v = r.verb;
while (nt != null && !nt.Equals("\n") && pos < str.Length) nt = next();
r.verb = v;
if (t.Equals("")) return null;
}
else if (nt.Equals("@prefix") || nt.Equals("bind")) {
String nsc = tokenize();
if (nsc.Equals("default")) nsc = ":";
String nsd = nsc;
String nsu = tokenize();
if (u != null && nsu.Length > 0 && nsu[0] == '<' && nsu.IndexOf(':') == - 1)
nsu = '<' + toURI(nsu.Substring(1, nsu.Length - 2)) + '>';
String nsv = (String) r.hshtNs[nsd];
while (nsv != null && !nsu.Equals(nsv)) {
nsd = "ns" + nsd;
nsv = (String) r.hshtNs[nsd];
}
r.nsp[nsd] = nsd;
r.nsp[nsc] = nsd;
r.hshtNs[nsd] = nsu;
tokenize();
return null;
}
else if (nt.Equals("\"")) {
StringBuilder sb = new StringBuilder("\"");
if (str[pos] == '"' && str[pos + 1] == '"') {
sb.Append(next());
sb.Append(next());
while (true) {
sb.Append(next());
if (sb.ToString().EndsWith("\"\"\"") && str[pos] != '"' && str[pos + 1] != '"') break;
}
t = sb.ToString();
}
else {
nt = next();
while (!nt.Equals("\"")) {
if (nt.Equals("\\")) {
sb.Append(nt);
nt = next();
}
sb.Append(nt);
if (pos >= str.Length) {
Console.Error.WriteLine("** (t1) expecting \" at " + sb);
break;
}
nt = next();
}
sb.Append("\"");
String lang = "";
if (str[pos] == '@') {
lang = token();
int i = lang.IndexOf('-');
if (i != -1) sb.Append(lang.Substring(0, i) + lang.Substring(i).ToUpper());
else sb.Append(lang);
}
t = sb.ToString();
}
}
else if (nt.Equals("'")) {
StringBuilder sb = new StringBuilder("'");
nt = next();
while (!nt.Equals("'")) {
sb.Append(nt);
if (pos >= str.Length) {
Console.Error.WriteLine("** (t2) expecting ' at " + sb);
break;
}
nt = next();
}
t = sb.Append("'").ToString();
}
else if (nt.Equals("<")) {
StringBuilder sb = new StringBuilder("<");
nt = next();
while (!nt.Equals(">")) {
sb.Append(nt);
if (pos >= str.Length) {
Console.Error.WriteLine("** (t2) expecting > at " + sb);
break;
}
nt = next();
}
t = sb.Append(">").ToString();
}
else if (nt.Equals("=")) {
if (str[pos] == '>') t = nt + next();
else t = nt;
}
else if (nt.Equals("is")) {
t = tokenize();
nt = tokenize();
if (!nt.Equals("of")) Console.Error.WriteLine("** (t4) expecting \"of\" but got " + nt);
t = t + "@@";
}
else if (nt.Equals("has")) {
t = tokenize();
nt = tokenize();
if (!nt.Equals("of")) Console.Error.WriteLine("** (t5) expecting \"of\" but got " + nt);
}
else if (nt.Equals(" ") || nt.Equals("\t") || nt.Equals("\r") || nt.Equals("\n") ||
nt.Equals("-") || nt.Equals(">"))
return null;
else if (nt.StartsWith("_:") && !nt.EndsWith("_" + Euler.doc)) t = nt + "_" + Euler.doc;
else t = nt;
if (pos < str.Length && ST.IndexOf(t) == -1 && t[0] != '"' && str[pos] == '"') t = t + token();
if (pos < str.Length && str[pos] == '.' &&
str[pos + 1] != ' ' && str[pos + 1] != '\n' &&
str[pos + 1] != ']' && str[pos + 1] != '}' && str[pos + 1] != '#') {
int opos = pos;
try {
t = XmlConvert.ToDouble(t + next() + next()).ToString();
}
catch (Exception) {
pos = opos;
}
}
if (r.verb != null && t != null && !r.verb.Equals(".") && t.Equals("}")) {
r.verb = ".}";
return ".";
}
if (r.verb != null && t != null && !r.verb.Equals(";") && t.Equals("]")) {
r.verb = ";]";
return ";";
}
return r.verb = t;
}
internal void swap(Euler e) {
if (e != null && e.subj == null && e.cverb.EndsWith("@@")) {
e.subj = e.obj;
e.obj = parse(false, e.cverb.Substring(0, e.cverb.Length - 2));
e.verb = e.cverb = ".";
}
else if (e.subj != null && e.obj != null && e.cverb.EndsWith("@@")) {
Euler el = e.subj;
e.subj = e.obj;
e.obj = el;
e.cverb = e.cverb.Substring(0, e.cverb.Length - 2);
}
}
internal void list(Euler e) {
Euler el = e;
while (true) {
String nt = tokenize();
if (nt.StartsWith("@")) {
el.obj = parse(false, '?' + nt.Substring(1));
nt = tokenize();
if (!nt.Equals(")")) Console.Error.WriteLine("** (p6) expecting ) at " + el + " but got " + nt);
break;
}
else if (nt.Equals(")")) {
el.obj = parse(false, RDFnil);
break;
}
else el.obj = parse(false, RDFfirst);
if (nt.Equals("[")) {
el.obj.obj = parse(true, null);
tokenize();
}
else el.obj.obj = parse(false, nt);
path(el.obj.obj);
el.obj.near = parse(false, RDFrest);
el = el.obj.near;
}
path(e.obj);
}
internal void path(Euler e) {
if (str != null && pos < str.Length - 1 && str[pos] == '.' &&
str[pos + 1] != ' ' && str[pos + 1] != '\n' &&
str[pos + 1] != ']' && str[pos + 1] != '}' && str[pos + 1] != '#') {
pos = pos + 1;
e.subj = e.copy();
e.verb = e.cverb = ".";
e.bound = true;
e.obj = parse(false, tokenize());
String.Intern(e.obj.verb);
path(e);
}
if (str != null && pos < str.Length - 1 && str[pos] == '^' &&
str[pos + 1] != '^' && str[pos + 1] != ' ' && str[pos + 1] != '\n' &&
str[pos + 1] != ']' && str[pos + 1] != '}' && str[pos + 1] != '#') {
pos = pos + 1;
e.subj = e.copy();
e.verb = e.cverb = "^";
e.bound = true;
e.obj = parse(false, tokenize());
String.Intern(e.obj.verb);
path(e);
}
}
internal String toURI(String s) {
if (s.Equals("") || s.Equals("#")) s = u;
if (!u.StartsWith("file:")) {
try {
if (s.EndsWith("#")) s = new Uri(new Uri(u.Substring(0, u.LastIndexOf('/') + 1)), s.Substring(0, s.Length -1)).ToString() + '#';
else s = new Uri(new Uri(u), s).ToString();
}
catch (Exception e) {
Console.Error.WriteLine(u + " " + s + " " + e);
}
}
if (s.EndsWith(".n3#")) s = s.Substring(0, s.Length - 4) + "#";
return s;
}
}