1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.any23.extractor.html;
19
20 import java.io.FileNotFoundException;
21 import java.io.IOException;
22
23 import org.apache.any23.AbstractAny23TestBase;
24 import org.junit.Assert;
25 import org.junit.Test;
26
27
28
29
30 public class EncodingTest extends AbstractAny23TestBase {
31
32 private final static String HELLO_WORLD = "Hell\u00F6 W\u00F6rld!";
33
34 @Test
35 public void testEncodingHTML_ISO_8859_1() throws Exception {
36 HTMLDocument document = parseHTML("/microformats/xfn/encoding-iso-8859-1.html");
37 Assert.assertEquals(HELLO_WORLD, document.find("//TITLE"));
38 }
39
40 @Test
41 public void testEncodingHTML_UTF_8() throws Exception {
42 HTMLDocument document = parseHTML("/microformats/xfn/encoding-utf-8.html");
43 Assert.assertEquals(HELLO_WORLD, document.find("//TITLE"));
44 }
45
46
47
48
49
50
51
52
53
54
55
56 @Test
57 public void testEncodingHTML_UTF_8_DeclarationAfterTitle() throws Exception {
58 HTMLDocument document = parseHTML("/microformats/xfn/encoding-utf-8-after-title.html");
59 Assert.assertNotSame(HELLO_WORLD, document.find("//TITLE"));
60 }
61
62 @Test
63 public void testEncodingXHTML_ISO_8859_1() throws Exception {
64 HTMLDocument document = parseHTML("/microformats/xfn/encoding-iso-8859-1.xhtml");
65 Assert.assertEquals(HELLO_WORLD, document.find("//TITLE"));
66 }
67
68 @Test
69 public void testEncodingXHTML_UTF_8() throws Exception {
70 HTMLDocument document = parseHTML("/microformats/xfn/encoding-utf-8.xhtml");
71 Assert.assertEquals(HELLO_WORLD, document.find("//TITLE"));
72 }
73
74 private HTMLDocument parseHTML(String filename) throws FileNotFoundException, IOException {
75 return new HTMLFixture(copyResourceToTempFile(filename)).getHTMLDocument();
76 }
77 }