1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.rdfa;
19
20 import org.apache.any23.extractor.html.AbstractExtractorTestCase;
21 import org.apache.any23.rdf.RDFUtils;
22 import org.apache.any23.vocab.DCTerms;
23 import org.apache.any23.vocab.FOAF;
24 import org.junit.Test;
25 import org.slf4j.Logger;
26 import org.slf4j.LoggerFactory;
27
28
29
30
31
32
33 public abstract class AbstractRDFaExtractorTestCase extends AbstractExtractorTestCase {
34
35 protected static final DCTerms vDCTERMS = DCTerms.getInstance();
36 protected static final FOAF vFOAF = FOAF.getInstance();
37
38 Logger logger = LoggerFactory.getLogger(RDFaExtractorTest.class);
39
40
41
42
43
44
45
46 @Test
47 public void testBasic() throws Exception {
48 assertExtract("/html/rdfa/basic.html");
49 logger.info(dumpModelToNQuads());
50 assertContains(null, vDCTERMS.creator, RDFUtils.literal("Alice", "en"));
51 assertContains(null, vDCTERMS.title, RDFUtils.literal("The trouble with Bob", "en"));
52 assertContains(null, RDFUtils.iri("http://fake.org/prop"), RDFUtils.literal("Mary", "en"));
53 }
54
55
56
57
58
59
60
61
62
63 @Test
64 public void testRDFa11CURIEs() throws Exception {
65 assertExtract("/html/rdfa/rdfa-11-curies.html");
66 assertModelNotEmpty();
67 assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
68 RDFUtils.iri("http://dbpedia.org/name"), RDFUtils.literal("Albert Einstein"));
69 assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
70 RDFUtils.iri("http://dbpedia.org/knows"),
71 RDFUtils.iri("http://dbpedia.org/resource/Franklin_Roosevlet"));
72 assertContains(RDFUtils.iri("http://database.org/table/Departments"),
73 RDFUtils.iri("http://database.org/description"), RDFUtils.literal("Tables listing departments"));
74 assertContains(RDFUtils.iri("http://database.org/table/Departments"), RDFUtils.iri("http://database.org/owner"),
75 RDFUtils.iri("http://database.org/people/Davide_Palmisano"));
76 assertContains(RDFUtils.iri("http://database.org/table/Departments"),
77 RDFUtils.iri("http://xmlns.com/foaf/0.1/author"),
78 RDFUtils.iri("http://database.org/people/Davide_Palmisano"));
79 assertContains(RDFUtils.iri("http://database.org/table/Departments"),
80 RDFUtils.iri("http://purl.org/dc/01/name"), RDFUtils.literal("Departments"));
81 assertStatementsSize(null, null, null, 6);
82 logger.debug(dumpHumanReadableTriples());
83 }
84
85
86
87
88
89
90
91
92
93 @Test
94 public void testEmptyDatatypeDeclarationWithInnerXMLTags() throws Exception {
95 assertExtract("/html/rdfa/null-datatype-test.html");
96 logger.debug(dumpModelToRDFXML());
97
98 assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"), vFOAF.name,
99 RDFUtils.literal("Albert Einstein", "en"));
100
101 }
102
103
104
105
106
107
108
109
110
111 @Test
112 public void testDrupalTestPage() throws Exception {
113 assertExtract("/html/rdfa/drupal-test-frontpage.html");
114 logger.debug(dumpModelToTurtle());
115 assertContains(RDFUtils.iri("http://bob.example.com/node/3"), vDCTERMS.title,
116 RDFUtils.literal("A blog post...", "en"));
117 }
118
119
120
121
122
123
124
125 @Test
126 public void testIncompleteTripleManagement() throws Exception {
127 assertExtract("/html/rdfa/incomplete-triples.html");
128 logger.debug(dumpModelToTurtle());
129
130 assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
131 RDFUtils.iri("http://dbpedia.org/property/birthPlace"),
132 RDFUtils.iri("http://dbpedia.org/resource/Germany"));
133 assertContains(RDFUtils.iri("http://dbpedia.org/resource/Germany"),
134 RDFUtils.iri("http://dbpedia.org/property/conventionalLongName"),
135 RDFUtils.literal("Federal Republic of Germany"));
136 assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
137 RDFUtils.iri("http://dbpedia.org/property/citizenship"),
138 RDFUtils.iri("http://dbpedia.org/resource/Germany"));
139 assertContains(RDFUtils.iri("http://dbpedia.org/resource/Albert_Einstein"),
140 RDFUtils.iri("http://dbpedia.org/property/citizenship"),
141 RDFUtils.iri("http://dbpedia.org/resource/United_States"));
142 }
143
144 }