1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import org.apache.any23.source.DocumentSource;
21 import org.apache.any23.source.FileDocumentSource;
22 import org.junit.Assert;
23 import org.w3c.dom.Node;
24
25 import java.io.File;
26 import java.io.FileInputStream;
27 import java.io.IOException;
28
29
30
31
32 public class HTMLFixture {
33
34 private final File file;
35
36 public HTMLFixture(File file) {
37 Assert.assertNotNull("Test resource file was null", file);
38 Assert.assertTrue("Test resource file does not exist", file.exists());
39 this.file = file;
40 }
41
42 private File getFile() {
43 return file;
44 }
45
46 public DocumentSource getOpener(String baseIRI) {
47 return new FileDocumentSource(getFile(), baseIRI);
48 }
49
50
51
52
53 public Node getDOM() {
54 try {
55 return new TagSoupParser(new FileInputStream(getFile()), "http://example.org/").getDOM();
56 } catch (IOException ex) {
57 throw new RuntimeException(ex);
58 }
59 }
60
61
62
63
64 public HTMLDocument getHTMLDocument() {
65 return new HTMLDocument(getDOM());
66 }
67 }