1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import org.apache.any23.extractor.ExtractorFactory;
21 import org.apache.any23.vocab.SINDICE;
22 import org.junit.Test;
23 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
24
25
26
27
28
29
30 public class HTMLMetaExtractorTest extends AbstractExtractorTestCase {
31
32 private static final SINDICE vSINDICE = SINDICE.getInstance();
33
34 protected ExtractorFactory<?> getExtractorFactory() {
35 return new HTMLMetaExtractorFactory();
36 }
37
38 @Test
39 public void testExtractPageMeta() throws Exception {
40 assertExtract("/html/html-head-meta-extractor.html");
41 assertModelNotEmpty();
42 assertStatementsSize(null, null, null, 10);
43 assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
44 SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/title"),
45 "XHTML+RDFa example", "en");
46 assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
47 SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/language"), "en", "en");
48 assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
49 SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/subject"),
50 "XHTML+RDFa, semantic web", "en");
51 assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
52 SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/format"),
53 "application/xhtml+xml", "en");
54 assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
55 SimpleValueFactory.getInstance().createIRI("http://purl.org/dc/elements/1.1/description"),
56 "Example for Extensible Hypertext Markup Language + Resource Description Framework – in – attributes.",
57 "en");
58 assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
59 SimpleValueFactory.getInstance().createIRI(vSINDICE.NAMESPACE.toString() + "robots"), "index, follow",
60 "en");
61 assertContains(SimpleValueFactory.getInstance().createIRI("http://bob.example.com/"),
62 SimpleValueFactory.getInstance().createIRI(vSINDICE.NAMESPACE.toString() + "content-language"), "en",
63 "en");
64 }
65
66 @Test
67 public void testNoMeta() throws Exception {
68 assertExtract("/html/html-head-link-extractor.html");
69 assertModelEmpty();
70 }
71
72 @Test
73 public void testExtractPageMetaWithExtensionsPerMozillaSpecification() throws Exception {
74 assertExtract("/html/html-head-meta-extractor-with-mozilla-extensions.html");
75 assertModelNotEmpty();
76 assertStatementsSize(null, null, null, 2);
77 }
78
79 }