package euler; import java.net.URI; import java.util.Enumeration; import java.util.Vector; import euler.output.ILogger; import euler.output.Outputter; /** * N3 parser. * * @author Jos De Roo */ public class Parser { String str; // N3 string int pos = 0; // tokenizer position int line = 1; // line counter Vector vt = null; // quantified variables table Euler r = null; // root Euler object String u = null; // base URI of the RDF resource String ou = null; // original base URI of the RDF resource int lv = 0; // graph nesting level static final String ST = "{}[]()<>\"';,.^! \t\r\n\\\ufeff"; static final String LOGa = "a"; static final String LOGe = "="; static final String LOGi = "=>"; static final String LOGt = "true"; static final String LOGf = "false"; static final String DPO = ""; if (Euler.prologBChain) e.src = toQName(u + "#line_" + line); String es = (String) Euler.z.get(e.src); if (es != null) e.src = es; else Euler.z.put(e.src, e.src); } e.line = line; if (b) { if (nt == null) e.subj = null; else if (nt.equals("{")) { lv++; nt = tokenize(); if (nt.equals(".")) e.subj = parse(false, "{}"); else e.subj = parse(true, nt); Euler el = e.subj; while (el.near != null) el = el.near; nt = tokenize(); while (!nt.equals("}")) { el.near = parse(true, nt); while (el.near != null) el = el.near; nt = tokenize(); } lv--; } else if (nt.equals("[")) { e.subj = parse(true, null); if (e.subj.verb == null) r.verb = ";]"; nt = tokenize(); if (!nt.equals("]")) System.err.println("** Parser error ** " + u + " line " + line + ": (p1) expecting ] at " + e + " but got " + nt); int cpos = pos; nt = tokenize(); if (nt.equals(".")) { e.obj = parse(false, ""); e.verb = ""; e.cverb = ""; e.bound = true; return e; } else pos = cpos; } else if (nt.equals("(")) { list(e); e.subj = e.obj; e.obj = null; int cpos = pos; nt = tokenize(); if (nt.equals(".")) return new Euler(); else pos = cpos; } else { e.subj = parse(false, nt); if (nt.equals("this")) e.subj.verb = '<' + u + "#frag" + e.uid + '>'; } path(e.subj); if (nt != null && nt.equals("@forAll")) { e.subj = parse(false, "this"); nt = "log:forAll"; } else if (nt != null && nt.equals("@forSome")) { e.subj = parse(false, "this"); nt = "log:forSome"; } else nt = tokenize(); } if (nt == null || nt.equals(";")) { e.bound = true; return e; } else if (e.subj != null && nt.startsWith("_:")) { e.cverb = nt; if (!vt.contains(nt)) vt.addElement(nt); } else if (nt.equals("[")) { String a = (Euler.prolog ? "?V" : "_:e") + e.uid; Euler ap = parse(true, a); e.cverb = a; if (ap.verb != null) r.verb = ";]"; nt = tokenize(); if (!nt.equals("]")) System.err.println("** Parser error ** " + u + " line " + line + ": (p2) expecting ] at " + e + " but got " + nt); if (ap.verb.equals(Euler.OWLsameAs)) { e.cverb = ap.obj.cverb; e.vv = true; } else e.near = ap; } else if (nt.equals("(")) { list(e); return e.obj; } else e.cverb = nt; if (e.cverb.endsWith("_@")) { e.vv = true; e.cverb = e.cverb.substring(0, e.cverb.length() - 2); } e.verb = e.cverb; if (e.verb.endsWith("@@")) e.verb = e.verb.substring(0, e.verb.length() - 2); if (r != null && e.verb.length() > 0 && e.verb.charAt(0) == '<') { String nsq = toURI(e.verb.substring(1, e.verb.length() - 1)); e.verb = e.cverb = toQName(nsq); } if (e.subj != null && (e.cverb.equals("log:semantics") || e.cverb.equals("log:uri"))) e.subj.cverb = e.subj.verb; String s1 = e.verb; String s2 = ""; if (e.verb.indexOf('"') != -1) { s1 = e.verb.substring(0, e.verb.indexOf('"')); s2 = e.verb.substring(e.verb.indexOf('"')); } if (r != null && s1.length() > 0 && s1.indexOf(':') == -1 && s1.charAt(0) != '(' && s1.charAt(0) != '<' && s1.charAt(0) != '[' && s1.charAt(0) != '_' && s1.charAt(0) != '"' && s1.charAt(0) != '?' && !r.isNumeral(s1) && r.ext.kw.get(s1) == null) s1 = ":" + s1; if (r != null && s1.length() > 0 && s1.indexOf(':') != -1 && s1.charAt(0) != '(' && s1.charAt(0) != '<' && s1.charAt(0) != '[' && s1.charAt(0) != '_' && s1.charAt(0) != '"') { String pf = s1.substring(0, s1.indexOf(':') + 1); String pg = (String) r.ext.nsp.get(pf); if (pg != null) pf = pg; String qf = (String) r.ext.ns.get(pf); if (qf == null) { System.err.println("** Parser warning ** " + ou + " line " + line + ": no @prefix " + pf + " found for " + s1 + ", taking <" + ou + "#>"); qf = "<" + ou + "#>"; r.ext.ns.put(pf, qf); } StringBuffer sb = new StringBuffer(pf); sb.append(e.cverb.substring(e.cverb.indexOf(':') + 1)); e.cverb = sb.toString(); sb = new StringBuffer(qf); sb.insert(sb.length() - 1, s1.substring(s1.indexOf(':') + 1)); e.verb = sb.toString() + s2; } else if (u != null && s1.length() > 0 && s1.charAt(0) == '<' && s1.indexOf(':') == -1) e.verb = e.cverb = '<' + toURI(s1.substring(1, s1.length() - 1)) + '>' + s2; if (e.verb.length() > 0 && e.verb.charAt(0) == '?' && e.verb.indexOf('@') != -1 || e.verb.indexOf("\"@") != -1 && str.charAt(pos) != '^' && str.charAt(pos + 1) != '^') { r.ext.kw.put(e.verb.substring(e.verb.lastIndexOf('@') + 1), r); e.subj = parse(false, e.verb.substring(0, e.verb.lastIndexOf('@'))); e.obj = parse(false, e.verb.substring(e.verb.lastIndexOf('@') + 1)); e.verb = e.cverb = "@"; } if (e.verb.equals(LOGa)) e.verb = Euler.RDFtype; if (e.verb.equals(LOGe)) { e.verb = Euler.OWLsameAs; e.cverb = "owl:sameAs"; } if (e.verb.equals(LOGi)) e.verb = Euler.LOGimplies; if (e.verb.equals(LOGt)) e = parse(false, "\"true\"^^xsd:boolean"); if (e.verb.equals(LOGf)) e = parse(false, "\"false\"^^xsd:boolean"); if (e.verb.startsWith(DPO)) e.verb = Euler.OWL + e.verb.substring(DPO.length()); if (e.verb.startsWith(ONT)) e.verb = Euler.OWL + e.verb.substring(ONT.length()); if (e.verb.equals(OWLequivalentTo)) e.verb = Euler.OWLsameAs; if (e.verb.equals(OWLsameIndividualAs)) e.verb = Euler.OWLsameAs; if (e.verb.equals(OWLsameClassAs)) e.verb = Euler.OWLequivalentClass; if (e.verb.equals(OWLsamePropertyAs)) e.verb = Euler.OWLequivalentProperty; if (e.verb.equals(OWLdifferentIndividualFrom)) e.verb = Euler.OWLdifferentFrom; if (e.verb.equals(OWLUnambiguousProperty)) e.verb = Euler.OWLInverseFunctionalProperty; if (e.verb.equals(OWLUniqueProperty)) e.verb = Euler.OWLFunctionalProperty; if (e.verb.equals(Euler.RDFfirst)) e.cverb = "rdf:first"; if (e.verb.equals(Euler.RDFrest)) e.cverb = "rdf:rest"; if (e.verb.equals(Euler.RDFnil)) e.cverb = "()"; if (e.verb.equals("?")) e.verb = e.cverb = "?anon_" + pos; if (e.verb.startsWith("?") && vt != null && !vt.contains(e.verb)) vt.addElement(e.verb); if (vt != null) e.varid = vt.indexOf(e.verb); if (e.varid == -1) e.bound = true; if (nt.equals(".") && (str.charAt(pos) == ' ' || str.charAt(pos) == '\t' || str.charAt(pos) == '\r' || str.charAt(pos) == '\n' || str.charAt(pos) == '\ufeff')) return new Euler(); if (!nt.equals("rdf:first") && str != null && pos < str.length() && str.charAt(pos) == '^' && str.charAt(pos + 1) == '^') { pos = pos + 2; Euler et = parse(false, tokenize()); if (!et.verb.equals(Euler.XSDstring)) { e.subj = e.copy(); e.verb = e.cverb = "^^"; e.bound = true; e.obj = et; } } String ev = (String) Euler.z.get(e.verb); if (ev != null) e.verb = ev; else Euler.z.put(e.verb, e.verb); if (b) { nt = tokenize(); if (nt.equals("{")) { lv++; nt = tokenize(); if (nt.equals(".")) e.obj = parse(false, "{}"); else e.obj = parse(true, nt); Euler el = e.obj; while (el.near != null) el = el.near; nt = tokenize(); while (nt != null && !nt.equals("}")) { el.near = parse(true, nt); while (el.near != null) el = el.near; nt = tokenize(); } lv--; } else if (nt.equals("[")) { e.obj = parse(true, null); if (e.obj.verb != null) r.verb = ";]"; nt = tokenize(); if (!nt.equals("]")) System.err.println("** Parser error ** " + u + " line " + line + ": (p3) expecting ] at " + e + " but got " + nt); } else if (nt.equals("(")) list(e); else e.obj = parse(false, nt); path(e.obj); nt = tokenize(); if (nt == null) return e; if (nt.endsWith("\"\"\"")) { e.obj.cverb = e.obj.cverb + nt; e.obj.verb = e.obj.cverb; nt = tokenize(); } Euler el = e; while (nt.equals(";") || nt.equals(",")) { while (el.near != null) el = el.near; if (nt.equals(";")) { nt = tokenize(); if (nt.equals("]")) { r.verb = ";]"; nt = ";"; break; } else if (nt.equals(".")) break; el.near = parse(false, nt); } else if (nt.equals(",")) { el.near = parse(false, el.cverb); el.near.cverb = el.cverb; } el.near.subj = e.subj; nt = tokenize(); if (nt.equals("{")) { lv++; nt = tokenize(); if (nt.equals(".")) el.near.obj = parse(false, "{}"); else el.near.obj = parse(true, nt); Euler ef = el.near.obj; while (ef.near != null) ef = ef.near; nt = tokenize(); while (nt != null && !nt.equals("}")) { ef.near = parse(true, nt); while (ef.near != null) ef = ef.near; nt = tokenize(); } lv--; } else if (nt.equals("[")) { el.near.obj = parse(true, null); if (el.near.obj.verb != null) r.verb = ";]"; nt = tokenize(); if (!nt.equals("]")) System.err.println("** Parser error ** " + u + " line " + line + ": (p4) expecting ] at " + e + " but got " + nt); } else if (nt.equals("(")) list(el.near); else el.near.obj = parse(false, nt); nt = tokenize(); if (nt == null) return e; if (nt.endsWith("\"\"\"")) { el.near.obj.cverb = el.near.obj.cverb + nt; el.near.obj.verb = el.near.obj.cverb; nt = tokenize(); } swap(el.near); } if (!nt.equals(".") && !nt.equals(";")) System.err.println("** Parser error ** " + u + " line " + line + ": (p5) expecting . or ; at " + e + " but got " + nt); } swap(e); if (e.verb.equals(Euler.LOGimplies) && e.subj != null && e.subj.verb.equals("{}")) { e.obj.vv = true; e = e.obj; } e.far = r; return e; } catch (NullPointerException exc) { exc.printStackTrace(); return e; } } String tokenize() { String nt = token(); while (pos < str.length() && nt == null) nt = token(); return nt; } String next() { int start = pos; while (pos < str.length() && ST.indexOf(str.charAt(pos)) < 0) pos++; if (pos < str.length() && start == pos && ST.indexOf(str.charAt(pos)) >= 0) pos++; if (pos < str.length() && str.charAt(pos) == '\n') line++; return str.substring(start, pos); } String token() { if (r.verb != null && r.verb.equals(".}")) return r.verb = "}"; if (r.verb != null && r.verb.equals(";]")) return r.verb = "]"; if (pos >= str.length()) return null; int start = pos; String t = null; String nt = next(); if (nt.indexOf('#') != -1) { t = nt.substring(0, nt.indexOf('#')); String v = r.verb; while (nt != null && !nt.equals("\n") && pos < str.length()) nt = next(); r.verb = v; if (t.equals("")) return null; } else if (nt.equals("SELECT")) { StringBuffer sb = new StringBuffer("("); nt = tokenize(); while (!nt.equals("\r") && !nt.equals("\n") && pos < str.length()) { sb.append(nt); nt = next(); } t = sb.append(')').toString(); } else if (nt.equals("CONSTRUCT")) { t = tokenize(); } else if (nt.equals("FROM")) { // TODO nt = tokenize(); if (nt.equals("NAMED")) tokenize(); t = tokenize(); } else if (nt.equals("UNION")) { t = "."; } else if (nt.equals("OPTIONAL")) { // TODO t = tokenize(); } else if (nt.equals("FILTER")) { // TODO t = tokenize(); } else if (nt.equals("@base")) { String b = tokenize(); u = toURI(b.substring(1, b.length() - 1)); tokenize(); return null; } else if (nt.equals("@prefix") || nt.equals("PREFIX")) { String nsc = tokenize(); if (nsc.equals("default")) nsc = ":"; String nsd = null; String nsu = tokenize(); if (u != null && nsu.length() > 0 && nsu.charAt(0) == '<' && nsu.indexOf(':') == -1) nsu = '<' + toURI(nsu.substring(1, nsu.length() - 1)) + '>'; for (Enumeration enr = r.ext.ns.keys(); enr.hasMoreElements();) { String nsx = (String) enr.nextElement(); String nsy = (String) r.ext.ns.get(nsx); if (nsu.equals(nsy)) nsd = nsx; } if (nsd == null) { nsd = nsc; String nsv = (String) r.ext.ns.get(nsd); int i = 0; while (nsv != null && !nsu.equals(nsv)) { nsd = "p" + i + nsc; nsv = (String) r.ext.ns.get(nsd); i++; } } r.ext.nsp.put(nsc, nsd); r.ext.ns.put(nsd, nsu); if (nt.equals("@prefix")) tokenize(); return null; } else if (nt.equals("@keywords")) { while (nt != null && !nt.equals(".") && pos < str.length()) { nt = next(); if (!nt.equals(" ") && !nt.equals("\t") && !nt.equals("\r") && !nt.equals("\n") && !nt.equals("\ufeff") && !nt.equals(",") && !nt.equals(".")) r.ext.kw.put(nt, r); } t = tokenize(); } else if (nt.equals("@true")) { t = "true"; } else if (nt.equals("@false")) { t = "false"; } else if (nt.equals("\"")) { StringBuffer sb = new StringBuffer("\""); if (str.charAt(pos) == '"' && str.charAt(pos + 1) == '"') { sb.append(next()).append(next()); while (pos < str.length() && (str.charAt(pos) != '"' || str.charAt(pos + 1) != '"' || str.charAt(pos + 2) != '"')) sb.append(next()); sb.append(next()).append(next()).append(next()); } else { nt = next(); while (!nt.equals("\"")) { if (nt.equals("\\")) { sb.append(nt); nt = next(); } sb.append(nt); if (pos >= str.length()) { System.err.println("** Parser error ** " + u + " line " + line + ": (t1) expecting \" at " + sb); break; } nt = next(); } sb.append("\""); } if (pos < str.length() && str.charAt(pos) == '@') { String lang = token(); int i = lang.indexOf('-'); if (i != -1) sb.append(lang.substring(0, i) + lang.substring(i).toUpperCase()); else sb.append(lang); } t = sb.toString().replace("\\\\", "\\"); } else if (nt.equals("'")) { StringBuffer sb = new StringBuffer("'"); nt = next(); while (!nt.equals("'")) { sb.append(nt); if (pos >= str.length()) { System.err.println("** Parser error ** " + u + " line " + line + ": (t2) expecting ' at " + sb); break; } nt = next(); } t = sb.append("'").toString(); } else if (nt.equals("<") && str.charAt(pos) == '=') { next(); t = "=>@@"; } else if (nt.equals("<")) { StringBuffer sb = new StringBuffer("<"); nt = next(); while (!nt.equals(">")) { sb.append(nt); if (pos >= str.length()) { System.err.println("** Parser error ** " + u + " line " + line + ": (t2) expecting > at " + sb); break; } nt = next(); } t = sb.append(">").toString(); } else if (nt.equals("=")) { if (str.charAt(pos) == '>') t = nt + next(); else t = nt; } else if (nt.equals("is") || nt.equals("@is")) { t = tokenize(); nt = tokenize(); if (!(nt.equals("of") || nt.equals("@of"))) System.err.println("** Parser error ** " + u + " line " + line + ": (t4) expecting \"of\" but got " + nt); t = t + "@@"; } else if (nt.equals("has") || nt.equals("@has")) { t = tokenize() + "_@"; int cpos = pos; nt = tokenize(); if (!(nt.equals("of") || nt.equals("@of"))) pos = cpos; } else if (nt.equals(" ") || nt.equals("\t") || nt.equals("\r") || nt.equals("\n") || nt.equals("\ufeff") || nt.equals("-") || nt.equals(">")) return null; else if (nt.startsWith("_:") && !nt.endsWith("_")) t = nt + "_" + r.doc + "_"; else t = nt; if (pos < str.length() && ST.indexOf(t) == -1 && t.charAt(0) != '"' && str.charAt(pos) == '"') t = t + token(); while (pos < str.length() - 1 && str.charAt(pos) == '.' && str.charAt(pos + 1) != ' ' && str.charAt(pos + 1) != '\t' && str.charAt(pos + 1) != '\r' && str.charAt(pos + 1) != '\n' && str.charAt(pos + 1) != '\ufeff' && str.charAt(pos + 1) != ']' && str.charAt(pos + 1) != '}' && str.charAt(pos + 1) != '#') { t = t + next() + next(); } if (r.verb != null && t != null && !r.verb.equals(".") && t.equals("}")) { r.verb = ".}"; return "."; } if (r.verb != null && t != null && !r.verb.equals(";") && t.equals("]")) { r.verb = ";]"; return ";"; } return r.verb = t; } void swap(Euler e) { if (e != null && e.subj == null && e.cverb.endsWith("@@")) { e.subj = e.obj; e.obj = parse(false, e.cverb.substring(0, e.cverb.length() - 2)); e.verb = e.cverb = "!"; } else if (e.subj != null && e.obj != null && e.cverb.endsWith("@@")) { Euler el = e.subj; e.subj = e.obj; e.obj = el; e.cverb = e.cverb.substring(0, e.cverb.length() - 2); } } void list(Euler e) { Euler el = e; while (true) { String nt = tokenize(); if (nt.startsWith("@")) { el.obj = parse(false, nt.substring(1)); nt = tokenize(); if (!nt.equals(")")) System.err.println("** Parser error ** " + u + " line " + line + ": (p6) expecting ) at " + el + " but got " + nt); break; } else if (nt.equals(")")) { el.obj = parse(false, "rdf:nil"); break; } else el.obj = parse(false, "rdf:first"); if (nt.equals("{")) { lv++; nt = tokenize(); if (nt.equals(".")) el.obj.obj = parse(false, "{}"); else el.obj.obj = parse(true, nt); Euler er = el.obj.obj; while (er.near != null) er = er.near; nt = tokenize(); while (!nt.equals("}")) { er.near = parse(true, nt); while (er.near != null) er = er.near; nt = tokenize(); } lv--; } else if (nt.equals("[")) { //el.obj.obj = parse(true, null); Euler f = parse(true, null); if (f.verb.equals(Euler.RDFfirst)) el.obj.obj = f; else { f.subj = parse(false, "_:e" + f.uid + "_" + r.doc + "_"); el.obj.obj = f.subj; if (lv == 0) str = str + f; } tokenize(); } else el.obj.obj = parse(false, nt); path(el.obj.obj); el.obj.near = parse(false, "rdf:rest"); el = el.obj.near; } path(e.obj); } void path(Euler e) { if (str != null && pos < str.length() - 1 && str.charAt(pos) == '!' && str.charAt(pos + 1) != ' ' && str.charAt(pos + 1) != '\t' && str.charAt(pos + 1) != '\r' && str.charAt(pos + 1) != '\n' && str.charAt(pos + 1) != '\ufeff' && str.charAt(pos + 1) != ']' && str.charAt(pos + 1) != '}' && str.charAt(pos + 1) != '#') { pos = pos + 1; e.subj = e.copy(); e.verb = e.cverb = "!"; e.varid = -1; e.bound = true; e.obj = parse(false, tokenize()); e.near = null; path(e); } if (str != null && pos < str.length() - 1 && str.charAt(pos) == '^' && str.charAt(pos + 1) != '^' && str.charAt(pos + 1) != ' ' && str.charAt(pos + 1) != '\t' && str.charAt(pos + 1) != '\r' && str.charAt(pos + 1) != '\n' && str.charAt(pos + 1) != '\ufeff' && str.charAt(pos + 1) != ']' && str.charAt(pos + 1) != '}' && str.charAt(pos + 1) != '#') { pos = pos + 1; e.subj = e.copy(); e.verb = e.cverb = "^"; e.varid = -1; e.bound = true; e.obj = parse(false, tokenize()); if (e.obj.verb.equals(Euler.Esource) && e.subj.cverb == null) e.subj = parse(false, toQName(u + "#line_" + line)); e.near = null; path(e); } } final String toQName(String nsq) { int nsh = nsq.lastIndexOf('#'); int nss = nsq.lastIndexOf('/'); int nsc = nsq.lastIndexOf(':'); int nsi = (nsh != -1 ? nsh + 1 : (nss != -1 ? nss + 1 : nsc + 1)); String nsu = '<' + nsq.substring(0, nsi) + '>'; String nsn = nsq.substring(nsi); String nsp = null; for (Enumeration enr = r.ext.ns.keys(); enr.hasMoreElements();) { String nsx = (String) enr.nextElement(); String nsy = (String) r.ext.ns.get(nsx); if (nsu.equals(nsy)) nsp = nsx; } if (nsp == null) { int i = 0; while (true) { nsp = "nsp" + i + ':'; if (r.ext.ns.get(nsp) == null) break; i++; } r.ext.ns.put(nsp, nsu); } return nsp + nsn; } final String toURI(String s) { if (s.equals("")) return u; try { if (!u.startsWith("_:")) s = new URI(u).resolve(s).toString(); } catch (Exception e) { System.err.println("** Parser error ** " + u + " line " + line + ": found " + s + "\n" + e); } return s; } }