001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.commons.rdf.rdf4j.experimental; 019 020import java.io.IOException; 021import java.io.InputStream; 022import java.net.MalformedURLException; 023import java.net.URL; 024import java.nio.file.Files; 025import java.nio.file.Path; 026import java.util.Optional; 027import java.util.function.Consumer; 028import java.util.stream.Stream; 029 030import org.apache.commons.rdf.api.IRI; 031import org.apache.commons.rdf.api.Quad; 032import org.apache.commons.rdf.api.RDFSyntax; 033import org.apache.commons.rdf.rdf4j.RDF4J; 034import org.apache.commons.rdf.rdf4j.RDF4JBlankNodeOrIRI; 035import org.apache.commons.rdf.rdf4j.RDF4JDataset; 036import org.apache.commons.rdf.rdf4j.RDF4JGraph; 037import org.apache.commons.rdf.simple.experimental.AbstractRDFParser; 038import org.eclipse.rdf4j.model.Model; 039import org.eclipse.rdf4j.model.Resource; 040import org.eclipse.rdf4j.repository.util.RDFInserter; 041import org.eclipse.rdf4j.repository.util.RDFLoader; 042import org.eclipse.rdf4j.rio.ParserConfig; 043import org.eclipse.rdf4j.rio.RDFFormat; 044import org.eclipse.rdf4j.rio.RDFHandler; 045import org.eclipse.rdf4j.rio.RDFHandlerException; 046import org.eclipse.rdf4j.rio.Rio; 047import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler; 048 049/** 050 * RDF4J-based parser. 051 * <p> 052 * This can handle the RDF syntaxes {@link RDFSyntax#JSONLD}, 053 * {@link RDFSyntax#NQUADS}, {@link RDFSyntax#NTRIPLES}, 054 * {@link RDFSyntax#RDFXML}, {@link RDFSyntax#TRIG} and {@link RDFSyntax#TURTLE} 055 * - additional syntaxes can be supported by including the corresponding 056 * <em>rdf4j-rio-*</em> module on the classpath. 057 * 058 */ 059public class RDF4JParser extends AbstractRDFParser<RDF4JParser> { 060 061 private final class AddToQuadConsumer extends AbstractRDFHandler { 062 private final Consumer<Quad> quadTarget; 063 064 private AddToQuadConsumer(final Consumer<Quad> quadTarget) { 065 this.quadTarget = quadTarget; 066 } 067 068 @Override 069 public void handleStatement(final org.eclipse.rdf4j.model.Statement st) throws RDFHandlerException { 070 // TODO: if getRdfTermFactory() is a non-rdf4j factory, should 071 // we use factory.createQuad() instead? 072 // Unsure what is the promise of setting getRdfTermFactory() -- 073 // does it go all the way down to creating BlankNode, IRI and 074 // Literal? 075 quadTarget.accept(rdf4jTermFactory.asQuad(st)); 076 // Performance note: 077 // Graph/Quad.add should pick up again our 078 // RDF4JGraphLike.asStatement() 079 // and avoid double conversion. 080 // Additionally the RDF4JQuad and RDF4JTriple implementations 081 // are lazily converting subj/obj/pred/graph.s 082 } 083 } 084 085 private final static class AddToModel extends AbstractRDFHandler { 086 private final Model model; 087 088 public AddToModel(final Model model) { 089 this.model = model; 090 } 091 092 @Override 093 public void handleStatement(final org.eclipse.rdf4j.model.Statement st) throws RDFHandlerException { 094 model.add(st); 095 } 096 097 @Override 098 public void handleNamespace(final String prefix, final String uri) throws RDFHandlerException { 099 model.setNamespace(prefix, uri); 100 } 101 } 102 103 private RDF4J rdf4jTermFactory; 104 private ParserConfig parserConfig = new ParserConfig(); 105 106 @Override 107 protected RDF4J createRDFTermFactory() { 108 return new RDF4J(); 109 } 110 111 @Override 112 protected RDF4JParser prepareForParsing() throws IOException, IllegalStateException { 113 final RDF4JParser c = super.prepareForParsing(); 114 // Ensure we have an RDF4J for conversion. 115 // We'll make a new one if user has provided a non-RDF4J factory 116 c.rdf4jTermFactory = (RDF4J) getRdfTermFactory().filter(RDF4J.class::isInstance) 117 .orElseGet(c::createRDFTermFactory); 118 return c; 119 } 120 121 @Override 122 protected void parseSynchronusly() throws IOException { 123 final Optional<RDFFormat> formatByMimeType = getContentType().flatMap(Rio::getParserFormatForMIMEType); 124 final String base = getBase().map(IRI::getIRIString).orElse(null); 125 126 final ParserConfig parserConfig = getParserConfig(); 127 // TODO: Should we need to set anything? 128 final RDFLoader loader = new RDFLoader(parserConfig, rdf4jTermFactory.getValueFactory()); 129 final RDFHandler rdfHandler = makeRDFHandler(); 130 if (getSourceFile().isPresent()) { 131 // NOTE: While we could have used 132 // loader.load(sourcePath.toFile() 133 // if the path fs provider == FileSystems.getDefault(), 134 // that RDFLoader method does not use absolute path 135 // as the base URI, so to be consistent 136 // we'll always do it with our own input stream 137 // 138 // That means we may have to guess format by extensions: 139 final Optional<RDFFormat> formatByFilename = getSourceFile().map(Path::getFileName).map(Path::toString) 140 .flatMap(Rio::getParserFormatForFileName); 141 // TODO: for the excited.. what about the extension after following 142 // symlinks? 143 144 final RDFFormat format = formatByMimeType.orElse(formatByFilename.orElse(null)); 145 try (InputStream in = Files.newInputStream(getSourceFile().get())) { 146 loader.load(in, base, format, rdfHandler); 147 } 148 } else if (getSourceIri().isPresent()) { 149 try { 150 // TODO: Handle international IRIs properly 151 // (Unicode support for for hostname, path and query) 152 final URL url = new URL(getSourceIri().get().getIRIString()); 153 // TODO: This probably does not support https:// -> http:// 154 // redirections 155 loader.load(url, base, formatByMimeType.orElse(null), makeRDFHandler()); 156 } catch (final MalformedURLException ex) { 157 throw new IOException("Can't handle source URL: " + getSourceIri().get(), ex); 158 } 159 } 160 // must be getSourceInputStream then, this is guaranteed by 161 // super.checkSource(); 162 loader.load(getSourceInputStream().get(), base, formatByMimeType.orElse(null), rdfHandler); 163 } 164 165 /** 166 * Get the RDF4J {@link ParserConfig} to use. 167 * <p> 168 * If no parser config is set, the default configuration is provided. 169 * <p> 170 * <strong>Note:</strong> The parser config is mutable - changes in the 171 * returned config is reflected in this instance of the parser. To avoid 172 * mutation, create a new {@link ParserConfig} and set 173 * {@link #setParserConfig(ParserConfig)}. 174 * 175 * @return The RDF4J {@link ParserConfig} 176 */ 177 public ParserConfig getParserConfig() { 178 return parserConfig; 179 } 180 181 /** 182 * Set an RDF4J {@link ParserConfig} to use 183 * 184 * @param parserConfig 185 * Parser configuration 186 */ 187 public void setParserConfig(final ParserConfig parserConfig) { 188 this.parserConfig = parserConfig; 189 } 190 191 protected RDFHandler makeRDFHandler() { 192 193 // TODO: Can we join the below DF4JDataset and RDF4JGraph cases 194 // using RDF4JGraphLike<TripleLike<BlankNodeOrIRI,IRI,RDFTerm>> 195 // or will that need tricky generics types? 196 197 if (getTargetDataset().filter(RDF4JDataset.class::isInstance).isPresent()) { 198 // One of us, we can add them as Statements directly 199 final RDF4JDataset dataset = (RDF4JDataset) getTargetDataset().get(); 200 if (dataset.asRepository().isPresent()) { 201 return new RDFInserter(dataset.asRepository().get().getConnection()); 202 } 203 if (dataset.asModel().isPresent()) { 204 final Model model = dataset.asModel().get(); 205 return new AddToModel(model); 206 } 207 // Not backed by Repository or Model? 208 // Third-party RDF4JDataset subclass, so we'll fall through to the 209 // getTarget() handling further down 210 } else if (getTargetGraph().filter(RDF4JGraph.class::isInstance).isPresent()) { 211 final RDF4JGraph graph = (RDF4JGraph) getTargetGraph().get(); 212 213 if (graph.asRepository().isPresent()) { 214 final RDFInserter inserter = new RDFInserter(graph.asRepository().get().getConnection()); 215 if (!graph.getContextMask().isEmpty()) { 216 final Stream<RDF4JBlankNodeOrIRI> b = graph.getContextMask().stream(); 217 final Stream<Resource> c = b.map(RDF4JBlankNodeOrIRI::asValue); 218 final Resource[] contexts = c.toArray(Resource[]::new); 219 inserter.enforceContext(contexts); 220 } 221 return inserter; 222 } 223 if (graph.asModel().isPresent() && graph.getContextMask().isEmpty()) { 224 // the model accepts any quad 225 final Model model = graph.asModel().get(); 226 return new AddToModel(model); 227 } 228 // else - fall through 229 } 230 231 // Fall thorough: let target() consume our converted quads. 232 return new AddToQuadConsumer(getTarget()); 233 } 234 235}