// $Id: Parser.cs 83 2003-06-21 08:03:35Z mdupont $
// PxButton | build | csc /o /doc:Euler.xml *.cs |
using System;
using System.Collections;
using System.Text;
// for stack trace!
using System.Diagnostics;
using System.Reflection;
/// N3 parser
/// Jos De Roo
public class Parser {
internal String str; // N3 string
internal int pos = 0; // tokenizer position
internal ArrayList vt = null; // quantified variables table
internal Euler r = null; // root Euler object
internal String u = null; // base URI of the RDF resource
internal const String ST = "{}[]()<>\"';,.^! \t\r\n\\";
internal const String LOGa = "a";
internal const String LOGe = "=";
internal const String LOGi = "=>";
internal const String LOGimplies = "";
internal const String LOGnotImplies = "";
internal const String DPO = "constructs an N3 parser
static int instancecount = 0;
int instance = 0;
public Parser() {
instance = instancecount++;
}
/// constructs an N3 parser
/// the N3 string
/// quantified variables table
/// Euler object
/// URI of the RDF resource
public Parser(String s, ArrayList vartab, Euler root, String uri) {
instance = instancecount++;
str = s;
vt = vartab;
r = root;
u = uri;
}
/// N3 triple parse method
/// Euler object
public virtual Euler Parse() {
String nt = tokenize();
if (nt == null) return null;
return parse(true, nt);
}
/// N3 node parse method
/// next token
/// Euler object
public virtual Euler Parse(String nt) {
Console.Error.WriteLine (" public virtual Euler Parse(String nt)");
return parse(false, nt);
}
internal Euler parse(bool b, String nt) {
Console.Error.WriteLine ("internal Euler parse(bool b, String nt)");
Console.Error.WriteLine ("parser stack" + instance);
StackTrace st = new System.Diagnostics.StackTrace(true);
for(int i = 0; i < st.FrameCount; i ++)
{
StackFrame sf = st.GetFrame(i);
Debug.WriteLine(" File: " + sf.GetFileName() +
" Line: " + sf.GetFileLineNumber() +
" Method: " + sf.GetMethod());
}
Console.Error.WriteLine ("parser end of stack");
Euler e = new Euler();
Console.Error.WriteLine ("new euler");
try {
if (b) {
Console.Error.WriteLine ("ifb");
if (nt == null)
{
Console.Error.WriteLine ("if nt == null");
e.subj = null;
}
else if (nt.Equals("{"))
{
Console.Error.WriteLine ("if nt == {");
Console.Error.WriteLine ("before tokenize");
nt = tokenize();
if (nt.Equals(".")) {
Console.Error.WriteLine ("if nt == .");
Console.Error.WriteLine ("going to recurse");
e.subj = parse(false, "{}");
Console.Error.WriteLine ("after recurse");
}
else
{
Console.Error.WriteLine ("else");
Console.Error.WriteLine ("going to recurse2");
e.subj = parse(true, nt);
Console.Error.WriteLine ("after recurse2");
}
Euler el = e.subj;
Console.Error.WriteLine ("before tokenize2");
nt = tokenize();
Console.Error.WriteLine ("after tokenize2");
while (el.near != null) {
el = el.near;
}
while (!nt.Equals("}")) {
Console.Error.WriteLine ("going to recurse 3");
el.near = parse(true, nt);
Console.Error.WriteLine ("after recurse 3");
while (el.near != null)
{
el = el.near;
}
nt = tokenize();
}
}
else if (nt.Equals("[")) {
Console.Error.WriteLine ("recurse 4");
e.subj = parse(true, null);
Console.Error.WriteLine ("~recurse 4");
if (e.subj.verb == null) r.verb = ";]";
nt = tokenize();
if (!nt.Equals("]")) Console.Error.WriteLine("** (p1) expecting ] at " + e + " but got " + nt);
}
else if (nt.Equals("(")) {
list(e);
e.subj = e.obj;
e.obj = null;
}
else {
Console.Error.WriteLine ("recurse 5");
e.subj = parse(false, nt);
Console.Error.WriteLine ("~recurse 5");
if (nt.Equals("this")) e.subj.verb = '<' + u + "#frag" + e.GetHashCode() + '>';
}
nt = tokenize();
}
Console.Error.WriteLine ("after ifb");
//// ------------------------
if (nt == null || nt.Equals(";")) {
Console.Error.WriteLine ("nt = ;");
e.bound = true;
Console.Error.WriteLine ("Parser:Return1");
return e;
}
else if (e.subj != null && nt.StartsWith("_:")) {
// verb handler!
Console.Error.WriteLine ("nt =~ _:");
e.cverb = nt;
if (!vt.Contains(nt)) vt.Add(nt);
}
else if (nt.Equals("[")) {
// verb handler!
Console.Error.WriteLine ("nt = [");
String a = "_:" + e.GetHashCode();
if (!vt.Contains(a)) vt.Add(a);
Console.Error.WriteLine ("recurse 6");
Euler ap = parse(true, a);
Console.Error.WriteLine ("~recurse 6");
e.cverb = a;
if (ap.verb != null) r.verb = ";]";
nt = tokenize();
if (!nt.Equals("]")) Console.Error.WriteLine("** (p2) expecting ] at " + e + " but got " + nt);
e.near = ap;
}
else if (nt.Equals("(")) {
Console.Error.WriteLine ("nt = (");
list(e);
Console.Error.WriteLine ("Parser:Return2");
return e.obj;
}
else {
Console.Error.WriteLine ("else e.cverb = nt;");
e.cverb = nt;
}
//------------------------------
Console.Error.WriteLine ("ok, copy verb");
e.verb = e.cverb;
if (e.verb.EndsWith("@@")) e.verb = e.verb.Substring(0, e.verb.Length - 2);
String s1 = e.verb;
String s2 = "";
// Console.Error.WriteLine("e.verb index of " + e.verb);
if (e.verb.IndexOf('"') != -1) {
s1 = e.verb.Substring(0, e.verb.IndexOf('"'));
s2 = e.verb.Substring(e.verb.IndexOf('"'));
}
if (r != null && s1.Length > 0 && !s1.Equals(LOGa) && !s1.Equals(LOGe) &&
s1[0] != '(' && s1[0] != '<' && s1[0] != '[' &&
s1[0] != '_' && s1[0] != '"' && s1[0] != '\'' && s1.IndexOf(':') != - 1) {
String pf = s1.Substring(0, s1.IndexOf(':') + 1);
String pg = (String) r.hshtNsp[pf]; // TODO check
if (pg != null) pf = pg;
String qf = (String) r.hshtNs[pf];
if (qf == null) {
Console.Error.WriteLine("** no @prefix " + pf + " found, taking <" + u + "#>");
qf = "<" + u + "#>";
r.hshtNs[pf] = qf;
}
StringBuilder sb = new StringBuilder(pf);
// Console.Error.WriteLine("e.cverb.IndexOf " + e.cverb);
sb.Append(e.cverb.Substring(e.cverb.IndexOf(':') + 1));
e.cverb = sb.ToString();
sb = new StringBuilder(qf);
sb.Insert(sb.Length - 1, s1.Substring(s1.IndexOf(':') + 1));
e.verb = sb.ToString() + s2;
}
else if (u != null && s1.Length > 0 && s1[0] == '<' && s1.IndexOf(':') == - 1)
e.verb = e.cverb = '<' + toURI(s1.Substring(1, s1.Length - 2)) + '>' + s2;
//TODO see ParseSpecials();
if (e.verb.StartsWith("?") && !vt.Contains(e.verb)) vt.Add(e.verb);
if (vt != null)
{
// Console.Error.WriteLine("TODO: This broken");
// Console.Error.WriteLine("count " + count);
int count = vt.Count;
if (count > 0)
{
// Console.Error.WriteLine("e.varid = vt.IndexOf(e.verb); " + vt +":"+ e.verb);
// e.varid = vt.IndexOf(e.verb);
}
}
if (e.varid == - 1) e.bound = true;
if (nt.Equals(".")) {
Console.Error.WriteLine ("going to recurse 7");
e.obj = parse(false, "");
Console.Error.WriteLine ("~ recurse 7");
e.verb = "";
e.cverb = "";
e.bound = true;
Console.Error.WriteLine ("Parser:Return3");
return e;
}
if (str != null && pos < str.Length && str[pos] == '^' && str[pos + 1] == '^') {
pos = pos + 2;
Console.Error.WriteLine ("going to recurse 8");
Euler et = parse(false, tokenize());
Console.Error.WriteLine ("~going to recurse 8");
if (!et.verb.Equals(XSDstring)) {
e.subj = e.copy();
e.verb = e.cverb = "^^";
e.bound = true;
e.obj = et;
String.Intern(e.obj.verb);
if (e.subj.bound)
{
//TODO Datatype.Compare(e.obj.verb, r.getLit(e.subj), r.getLit(e.subj));
}
}
}
//---------------------------------
Console.Error.WriteLine ("before if b");
if (b) {
// Console.Error.WriteLine ("if b");
// nt = tokenize();
// if (nt.Equals("{")) {
// nt = tokenize();
// Console.Error.WriteLine ("going to recurse 9");
// if (nt.Equals(".")) e.obj = parse(false, "{}");
// else e.obj = parse(true, nt);
// Console.Error.WriteLine ("~recurse 9");
// Euler el = e.obj;
// nt = tokenize();
// while (nt != null && !nt.Equals("}")) {
// Console.Error.WriteLine ("going to recurse 10");
// el.near = parse(true, nt);
// Console.Error.WriteLine ("~recurse 10");
// el = el.near;
// nt = tokenize();
// }
// }
// else if (nt.Equals("[")) {
// Console.Error.WriteLine ("going to recurse 11");
// e.obj = parse(true, null);
// Console.Error.WriteLine ("~recurse 11");
// if (e.obj.verb != null) r.verb = ";]";
// nt = tokenize();
// if (!nt.Equals("]")) Console.Error.WriteLine("** (p3) expecting ] at " + e + " but got " + nt);
// }
// else if (nt.Equals("(")) list(e);
// else {
// Console.Error.WriteLine ("recurse 12");
// e.obj = parse(false, nt);
// Console.Error.WriteLine ("~recurse 12");
// if (e.obj.verb.Equals(OWLFunctionalProperty))
// {
// int x= e.subj.verb;
// r.hshtMto[x] = r;
// }
// if (e.obj.verb.Equals(OWLInverseFunctionalProperty)) r.hshtOtm[e.subj.verb] = r;
// }
// nt = tokenize();
// if (nt.EndsWith("\"\"\""))
{
e.obj.cverb = e.obj.cverb + nt;
e.obj.verb = e.obj.cverb;
nt = tokenize();
}
Euler el2 = e;
while (nt.Equals(";") || nt.Equals(",")) {
while (el2.near != null) el2 = el2.near;
if (nt.Equals(";")) {
nt = tokenize();
if (nt.Equals("]")) {
r.verb = ";]";
nt = ";";
break;
}
else if (nt.Equals(".")) break;
}
else nt = el2.cverb;
Console.Error.WriteLine ("going to recurse 13");
el2.near = parse(false, nt);
Console.Error.WriteLine ("~ recurse 13");
el2.near.subj = e.subj;
nt = tokenize();
if (nt.Equals("{")) {
nt = tokenize();
Console.Error.WriteLine ("going to recurse 14");
if (nt.Equals(".")) el2.near.obj = parse(false, "{}");
else el2.near.obj = parse(true, nt);
Console.Error.WriteLine ("~recurse 14");
Euler ef = el2.near.obj;
nt = tokenize();
while (nt != null && !nt.Equals("}")) {
Console.Error.WriteLine ("going to recurse 15");
ef.near = parse(true, nt);
Console.Error.WriteLine ("~recurse 15");
ef = ef.near;
nt = tokenize();
}
}
else if (nt.Equals("[")) {
Console.Error.WriteLine ("going to recurse 16");
el2.near.obj = parse(true, null);
Console.Error.WriteLine ("~recurse 16");
if (el2.near.obj.verb != null) r.verb = ";]";
nt = tokenize();
if (!nt.Equals("]")) Console.Error.WriteLine("** (p4) expecting ] at " + e + " but got " + nt);
}
else if (nt.Equals("(")) list(el2.near);
else {
Console.Error.WriteLine ("going to recurse 17");
Euler returnv = parse(false, nt);
Console.Error.WriteLine ("~recurse 17~");
Console.Error.WriteLine (" before set");
el2.near.obj = returnv;
Console.Error.WriteLine (" after set");
// if (el2.near.obj.verb.Equals(OWLFunctionalProperty)) r.hshtMto[el2.near.subj.verb] = r;
// if (el2.near.obj.verb.Equals(OWLInverseFunctionalProperty)) r.hshOtm[el2.near.subj.verb] = r;
}
nt = tokenize();
if (nt.EndsWith("\"\"\"")) {
el2.near.obj.cverb = el2.near.obj.cverb + nt;
el2.near.obj.verb = el2.near.obj.cverb;
nt = tokenize();
}
swap(el2.near);
}
if (!nt.Equals(".") && !nt.Equals(";"))
Console.Error.WriteLine("** (p5) expecting . or ; at " + e + " but got " + nt);
}
Console.Error.WriteLine ("after if b");
swap(e);
e.far = r;
Console.Error.WriteLine ("parser before return stack" );
Console.Error.WriteLine ("parser instance" + instance);
StackTrace st = new System.Diagnostics.StackTrace(true);
for(int i = 0; i < st.FrameCount; i ++)
{
StackFrame sf = st.GetFrame(i);
Debug.WriteLine(" File: " + sf.GetFileName() +
" Line: " + sf.GetFileLineNumber() +
" Method: " + sf.GetMethod());
}
Console.Error.WriteLine ("parser before return");
return e;
}
catch (NullReferenceException exc) {
Console.Error.WriteLine ("catch");
Console.Error.WriteLine(exc.StackTrace);
Console.Error.WriteLine ("before return exception");
return e;
}
}
internal String tokenize() {
String nt = token();
while (pos < str.Length && nt == null) nt = token();
return nt;
}
internal String next() {
int start = pos;
// Console.Error.WriteLine("ST.IndexOf(str[pos]) " + ST + ":" +str[pos] );
while (pos < str.Length && ST.IndexOf(str[pos]) < 0) {
pos++;
// Console.Error.WriteLine("ST.IndexOf(str[pos]) " + ST + ":" +str[pos] );
}
if (start == pos && ST.IndexOf(str[pos]) >= 0) pos++;
Console.Error.WriteLine ("Parser::next going to return");
return str.Substring(start, (pos) - (start));
}
internal String token() {
Console.Error.WriteLine("Parser::Token()" );
if (r.verb != null && r.verb.Equals(".}")) return r.verb = "}";
if (r.verb != null && r.verb.Equals(";]")) return r.verb = "]";
if (pos >= str.Length) return null;
int start = pos;
String t = null;
String nt = next();
Console.Error.WriteLine("token nt.IndexOf #" + nt);
if (nt.IndexOf('#') != - 1) {
t = nt.Substring(0, nt.IndexOf('#'));
String v = r.verb;
while (nt != null && !nt.Equals("\n") && pos < str.Length) nt = next();
r.verb = v;
if (t.Equals("")) return null;
}
else if (nt.Equals("@prefix") || nt.Equals("bind")) {
String nsc = tokenize();
if (nsc.Equals("default")) nsc = ":";
String nsd = nsc;
String nsu = tokenize();
// Console.Error.WriteLine("nsu.IndexOf #" + nsu);
if (u != null && nsu.Length > 0 && nsu[0] == '<' && nsu.IndexOf(':') == - 1)
nsu = '<' + toURI(nsu.Substring(1, nsu.Length - 2)) + '>';
String nsv = (String) r.hshtNs[nsd];
while (nsv != null && !nsu.Equals(nsv)) {
nsd = "ns" + nsd;
nsv = (String) r.hshtNs[nsd];
}
r.hshtNsp[nsd] = nsd; // TODO check
r.hshtNsp[nsc] = nsd; // TODO check
r.hshtNs[nsd] = nsu;
tokenize();
return null;
}
else if (nt.Equals("\"")) {
StringBuilder sb = new StringBuilder("\"");
if (str[pos] == '"' && str[pos+1] == '"') {
sb.Append(next());
sb.Append(next());
while (true) {
sb.Append(next());
if (sb.ToString().EndsWith("\"\"\"")) break;
}
t = sb.ToString();
}
else {
nt = next();
while (!nt.Equals("\"")) {
if (nt.Equals("\\")) {
sb.Append(nt);
nt = next();
}
sb.Append(nt);
if (pos >= str.Length) {
Console.Error.WriteLine("** (t1) expecting \" at " + sb);
break;
}
nt = next();
}
sb.Append("\"");
// Console.Error.WriteLine("-@" + str[pos]);
if ("-@".IndexOf(str[pos]) != -1) sb.Append(token().ToLower());
t = sb.ToString();
}
}
else if (nt.Equals("'")) {
StringBuilder sb = new StringBuilder("'");
nt = next();
while (!nt.Equals("'")) {
sb.Append(nt);
if (pos >= str.Length) {
Console.Error.WriteLine("** (t2) expecting ' at " + sb);
break;
}
nt = next();
}
t = sb.Append("'").ToString();
}
else if (nt.Equals("<")) {
StringBuilder sb = new StringBuilder("<");
nt = next();
while (!nt.Equals(">")) {
sb.Append(nt);
if (pos >= str.Length) {
Console.Error.WriteLine("** (t2) expecting > at " + sb);
break;
}
nt = next();
}
t = sb.Append(">").ToString();
}
else if (nt.Equals("=")) {
if (str[pos] == '>') t = nt + next();
else t = nt;
}
else if (nt.Equals("is")) {
t = tokenize();
nt = tokenize();
if (!nt.Equals("of")) Console.Error.WriteLine("** (t4) expecting \"of\" but got " + nt);
t = t + "@@";
}
else if (nt.Equals("has")) {
t = tokenize();
nt = tokenize();
if (!nt.Equals("of")) Console.Error.WriteLine("** (t5) expecting \"of\" but got " + nt);
}
else if (nt.Equals(" ") || nt.Equals("\t") || nt.Equals("\r") || nt.Equals("\n") ||
nt.Equals("-") || nt.Equals(">"))
return null;
else if (nt.StartsWith("_:") && !nt.EndsWith("_" + Euler.doc)) t = nt + "_" + Euler.doc;
else t = nt;
// Console.Error.WriteLine("ST . t " + ST + t);
if (pos < str.Length && ST.IndexOf(t) == -1 && t[0] != '"' && str[pos] == '"')
t = t + token();
if (pos < str.Length && str[pos] == '.') {
try {
Console.Error.WriteLine ("token call to parse" + t);
long ts = Int64.Parse(t);
Console.Error.WriteLine ("token parse returned");
t = ts + next() + next();
}
catch (Exception) {
Console.Error.WriteLine ("Token Caught exeception in parsing integer");
}
}
Console.Error.WriteLine ("near end of Token()");
if (r.verb != null && t != null && !r.verb.Equals(".") && t.Equals("}")) {
r.verb = ".}";
Console.Error.WriteLine ("Token:return .");
return ".";
}
if (r.verb != null && t != null && !r.verb.Equals(";") && t.Equals("]")) {
r.verb = ";]";
Console.Error.WriteLine ("Token:return ;");
return ";";
}
Console.Error.WriteLine ("Token:return r.verb = t;");
return r.verb = t;
}
internal void swap(Euler e) {
Console.Error.WriteLine ("Parser swap");
if (e != null && e.subj == null && e.cverb.EndsWith("@@")) {
e.subj = e.obj;
Console.Error.WriteLine ("swap going to call parse");
e.obj = parse(false, e.cverb.Substring(0, e.cverb.Length - 2));
Console.Error.WriteLine ("swap ~ call parse");
e.verb = e.cverb = "^^";
}
else if (e.subj != null && e.obj != null && e.cverb.EndsWith("@@")) {
Euler el = e.subj;
e.subj = e.obj;
e.obj = el;
e.cverb = e.cverb.Substring(0, e.cverb.Length - 2);
}
}
internal void list(Euler e) {
Console.Error.WriteLine ("parser_list");
Euler el = e;
while (true) {
String nt = tokenize();
if (nt.StartsWith("@")) {
Console.Error.WriteLine ("list going to call parse");
el.obj = parse(false, '?' + nt.Substring(1));
Console.Error.WriteLine ("list ~call parse");
nt = tokenize();
if (!nt.Equals(")")) Console.Error.WriteLine("** (p6) expecting ) at " + el + " but got " + nt);
break;
}
else if (nt.Equals(")")) {
el.obj = parse(false, RDFnil);
break;
}
else el.obj = parse(false, RDFfirst);
if (nt.Equals("[")) {
el.obj.obj = parse(true, null);
tokenize();
}
else el.obj.obj = parse(false, nt);
el.obj.near = parse(false, RDFrest);
el.obj.near.near = parse(false, LOGa);
el.obj.near.near.obj = parse(false, RDFList);
el = el.obj.near;
}
}
internal String toURI(String s) {
try {
s = new Uri(new Uri(u), s).ToString();
}
catch (Exception e) {
if (!u.StartsWith("file:")) Console.Error.WriteLine(u + " " + s + " " + e);
}
if (s.EndsWith(".n3#")) s = s.Substring(0, s.Length - 4) + "#";
return s;
}
}