1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.any23.extractor.rdfa; 19 20 import org.apache.any23.extractor.ExtractorFactory; 21 import org.junit.Assert; 22 import org.junit.Test; 23 import org.eclipse.rdf4j.model.Statement; 24 import org.eclipse.rdf4j.repository.RepositoryException; 25 import org.eclipse.rdf4j.rio.RDFHandlerException; 26 import org.eclipse.rdf4j.rio.RDFParseException; 27 28 import java.io.IOException; 29 import java.util.List; 30 31 /** 32 * Reference Test Class for {@link RDFaExtractor}. 33 */ 34 public class RDFaExtractorTest extends AbstractRDFaExtractorTestCase { 35 36 /** 37 * Taken from the <a href="http://www.heppnetz.de/rdfa4google/testcases.html">GoodRelations test cases</a>. It 38 * checks if the extraction is the same when the namespaces are defined in <i>RDFa1.0</i> or <i>RDFa1.1</i> 39 * respectively. 40 * 41 * @throws org.eclipse.rdf4j.repository.RepositoryException 42 * if an error is encountered whilst loading content from a storage connection 43 * @throws java.io.IOException 44 * if there is an error interpreting the input data 45 * @throws org.eclipse.rdf4j.rio.RDFHandlerException 46 * if there is an error within the {@link org.eclipse.rdf4j.rio.RDFHandler} 47 * @throws org.eclipse.rdf4j.rio.RDFParseException 48 * if there is an exception parsing an RDF Stream 49 */ 50 @Test 51 public void testRDFa11PrefixBackwardCompatibility() 52 throws RepositoryException, RDFHandlerException, IOException, RDFParseException { 53 final int EXPECTED_STATEMENTS = 31; 54 55 assertExtract("/html/rdfa/goodrelations-rdfa10.html"); 56 logger.debug("Model 1 " + dumpHumanReadableTriples()); 57 Assert.assertEquals(EXPECTED_STATEMENTS, dumpAsListOfStatements().size()); 58 List<Statement> rdfa10Stmts = dumpAsListOfStatements(); 59 60 // assertContainsModel("/html/rdfa/goodrelations-rdfa10-expected.nq"); 61 62 assertExtract("/html/rdfa/goodrelations-rdfa11.html"); 63 logger.debug("Model 2 " + dumpHumanReadableTriples()); 64 Assert.assertTrue(dumpAsListOfStatements().size() >= EXPECTED_STATEMENTS); 65 66 for (Statement stmt : rdfa10Stmts) { 67 assertContains(stmt); 68 } 69 } 70 71 @Test 72 public void testRDFa11CURIEs() throws Exception { 73 } 74 75 /** 76 * Tests that the default parser settings enable tolerance in data type parsing. 77 */ 78 @Test 79 public void testTolerantParsing() { 80 assertExtract("/html/rdfa/oreilly-invalid-datatype.html"); 81 } 82 83 @Override 84 protected ExtractorFactory<?> getExtractorFactory() { 85 return new RDFaExtractorFactory(); 86 } 87 88 }