001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.commons.rdf.rdf4j.experimental;
019
020import java.io.IOException;
021import java.io.InputStream;
022import java.net.MalformedURLException;
023import java.net.URL;
024import java.nio.file.Files;
025import java.nio.file.Path;
026import java.util.Optional;
027import java.util.function.Consumer;
028import java.util.stream.Stream;
029
030import org.apache.commons.rdf.api.IRI;
031import org.apache.commons.rdf.api.Quad;
032import org.apache.commons.rdf.api.RDFSyntax;
033import org.apache.commons.rdf.rdf4j.RDF4J;
034import org.apache.commons.rdf.rdf4j.RDF4JBlankNodeOrIRI;
035import org.apache.commons.rdf.rdf4j.RDF4JDataset;
036import org.apache.commons.rdf.rdf4j.RDF4JGraph;
037import org.apache.commons.rdf.simple.experimental.AbstractRDFParser;
038import org.eclipse.rdf4j.model.Model;
039import org.eclipse.rdf4j.model.Resource;
040import org.eclipse.rdf4j.repository.util.RDFInserter;
041import org.eclipse.rdf4j.repository.util.RDFLoader;
042import org.eclipse.rdf4j.rio.ParserConfig;
043import org.eclipse.rdf4j.rio.RDFFormat;
044import org.eclipse.rdf4j.rio.RDFHandler;
045import org.eclipse.rdf4j.rio.RDFHandlerException;
046import org.eclipse.rdf4j.rio.Rio;
047import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler;
048
049/**
050 * RDF4J-based parser.
051 * <p>
052 * This can handle the RDF syntaxes {@link RDFSyntax#JSONLD},
053 * {@link RDFSyntax#NQUADS}, {@link RDFSyntax#NTRIPLES},
054 * {@link RDFSyntax#RDFXML}, {@link RDFSyntax#TRIG} and {@link RDFSyntax#TURTLE}
055 * - additional syntaxes can be supported by including the corresponding
056 * <em>rdf4j-rio-*</em> module on the classpath.
057 *
058 */
059public class RDF4JParser extends AbstractRDFParser<RDF4JParser> {
060
061    private final class AddToQuadConsumer extends AbstractRDFHandler {
062        private final Consumer<Quad> quadTarget;
063
064        private AddToQuadConsumer(final Consumer<Quad> quadTarget) {
065            this.quadTarget = quadTarget;
066        }
067
068        @Override
069        public void handleStatement(final org.eclipse.rdf4j.model.Statement st) throws RDFHandlerException {
070            // TODO: if getRdfTermFactory() is a non-rdf4j factory, should
071            // we use factory.createQuad() instead?
072            // Unsure what is the promise of setting getRdfTermFactory() --
073            // does it go all the way down to creating BlankNode, IRI and
074            // Literal?
075            quadTarget.accept(rdf4jTermFactory.asQuad(st));
076            // Performance note:
077            // Graph/Quad.add should pick up again our
078            // RDF4JGraphLike.asStatement()
079            // and avoid double conversion.
080            // Additionally the RDF4JQuad and RDF4JTriple implementations
081            // are lazily converting subj/obj/pred/graph.s
082        }
083    }
084
085    private final static class AddToModel extends AbstractRDFHandler {
086        private final Model model;
087
088        public AddToModel(final Model model) {
089            this.model = model;
090        }
091
092        @Override
093        public void handleStatement(final org.eclipse.rdf4j.model.Statement st) throws RDFHandlerException {
094            model.add(st);
095        }
096
097        @Override
098        public void handleNamespace(final String prefix, final String uri) throws RDFHandlerException {
099            model.setNamespace(prefix, uri);
100        }
101    }
102
103    private RDF4J rdf4jTermFactory;
104    private ParserConfig parserConfig = new ParserConfig();
105
106    @Override
107    protected RDF4J createRDFTermFactory() {
108        return new RDF4J();
109    }
110
111    @Override
112    protected RDF4JParser prepareForParsing() throws IOException, IllegalStateException {
113        final RDF4JParser c = super.prepareForParsing();
114        // Ensure we have an RDF4J for conversion.
115        // We'll make a new one if user has provided a non-RDF4J factory
116        c.rdf4jTermFactory = (RDF4J) getRdfTermFactory().filter(RDF4J.class::isInstance)
117                .orElseGet(c::createRDFTermFactory);
118        return c;
119    }
120
121    @Override
122    protected void parseSynchronusly() throws IOException {
123        final Optional<RDFFormat> formatByMimeType = getContentType().flatMap(Rio::getParserFormatForMIMEType);
124        final String base = getBase().map(IRI::getIRIString).orElse(null);
125
126        final ParserConfig parserConfig = getParserConfig();
127        // TODO: Should we need to set anything?
128        final RDFLoader loader = new RDFLoader(parserConfig, rdf4jTermFactory.getValueFactory());
129        final RDFHandler rdfHandler = makeRDFHandler();
130        if (getSourceFile().isPresent()) {
131            // NOTE: While we could have used
132            // loader.load(sourcePath.toFile()
133            // if the path fs provider == FileSystems.getDefault(),
134            // that RDFLoader method does not use absolute path
135            // as the base URI, so to be consistent
136            // we'll always do it with our own input stream
137            //
138            // That means we may have to guess format by extensions:
139            final Optional<RDFFormat> formatByFilename = getSourceFile().map(Path::getFileName).map(Path::toString)
140                    .flatMap(Rio::getParserFormatForFileName);
141            // TODO: for the excited.. what about the extension after following
142            // symlinks?
143
144            final RDFFormat format = formatByMimeType.orElse(formatByFilename.orElse(null));
145            try (InputStream in = Files.newInputStream(getSourceFile().get())) {
146                loader.load(in, base, format, rdfHandler);
147            }
148        } else if (getSourceIri().isPresent()) {
149            try {
150                // TODO: Handle international IRIs properly
151                // (Unicode support for for hostname, path and query)
152                final URL url = new URL(getSourceIri().get().getIRIString());
153                // TODO: This probably does not support https:// -> http://
154                // redirections
155                loader.load(url, base, formatByMimeType.orElse(null), makeRDFHandler());
156            } catch (final MalformedURLException ex) {
157                throw new IOException("Can't handle source URL: " + getSourceIri().get(), ex);
158            }
159        }
160        // must be getSourceInputStream then, this is guaranteed by
161        // super.checkSource();
162        loader.load(getSourceInputStream().get(), base, formatByMimeType.orElse(null), rdfHandler);
163    }
164
165    /**
166     * Get the RDF4J {@link ParserConfig} to use.
167     * <p>
168     * If no parser config is set, the default configuration is provided.
169     * <p>
170     * <strong>Note:</strong> The parser config is mutable - changes in the
171     * returned config is reflected in this instance of the parser. To avoid
172     * mutation, create a new {@link ParserConfig} and set
173     * {@link #setParserConfig(ParserConfig)}.
174     *
175     * @return The RDF4J {@link ParserConfig}
176     */
177    public ParserConfig getParserConfig() {
178        return parserConfig;
179    }
180
181    /**
182     * Set an RDF4J {@link ParserConfig} to use
183     *
184     * @param parserConfig
185     *            Parser configuration
186     */
187    public void setParserConfig(final ParserConfig parserConfig) {
188        this.parserConfig = parserConfig;
189    }
190
191    protected RDFHandler makeRDFHandler() {
192
193        // TODO: Can we join the below DF4JDataset and RDF4JGraph cases
194        // using RDF4JGraphLike<TripleLike<BlankNodeOrIRI,IRI,RDFTerm>>
195        // or will that need tricky generics types?
196
197        if (getTargetDataset().filter(RDF4JDataset.class::isInstance).isPresent()) {
198            // One of us, we can add them as Statements directly
199            final RDF4JDataset dataset = (RDF4JDataset) getTargetDataset().get();
200            if (dataset.asRepository().isPresent()) {
201                return new RDFInserter(dataset.asRepository().get().getConnection());
202            }
203            if (dataset.asModel().isPresent()) {
204                final Model model = dataset.asModel().get();
205                return new AddToModel(model);
206            }
207            // Not backed by Repository or Model?
208            // Third-party RDF4JDataset subclass, so we'll fall through to the
209            // getTarget() handling further down
210        } else if (getTargetGraph().filter(RDF4JGraph.class::isInstance).isPresent()) {
211            final RDF4JGraph graph = (RDF4JGraph) getTargetGraph().get();
212
213            if (graph.asRepository().isPresent()) {
214                final RDFInserter inserter = new RDFInserter(graph.asRepository().get().getConnection());
215                if (!graph.getContextMask().isEmpty()) {
216                    final Stream<RDF4JBlankNodeOrIRI> b = graph.getContextMask().stream();
217                    final Stream<Resource> c = b.map(RDF4JBlankNodeOrIRI::asValue);
218                    final Resource[] contexts = c.toArray(Resource[]::new);
219                    inserter.enforceContext(contexts);
220                }
221                return inserter;
222            }
223            if (graph.asModel().isPresent() && graph.getContextMask().isEmpty()) {
224                // the model accepts any quad
225                final Model model = graph.asModel().get();
226                return new AddToModel(model);
227            }
228            // else - fall through
229        }
230
231        // Fall thorough: let target() consume our converted quads.
232        return new AddToQuadConsumer(getTarget());
233    }
234
235}