|
|
|
|
|
|
|
|
|
package xml |
|
|
|
import ( |
|
"bytes" |
|
"fmt" |
|
"io" |
|
"reflect" |
|
"strings" |
|
"testing" |
|
"unicode/utf8" |
|
) |
|
|
|
const testInput = ` |
|
<?xml version="1.0" encoding="UTF-8"?> |
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
|
<body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + |
|
"\r\n\t" + ` > |
|
<hello lang="en">World <>'" 白鵬翔</hello> |
|
<query>&何; &is-it;</query> |
|
<goodbye /> |
|
<outer foo:attr="value" xmlns:tag="ns4"> |
|
<inner/> |
|
</outer> |
|
<tag:name> |
|
<![CDATA[Some text here.]]> |
|
</tag:name> |
|
</body><!-- missing final newline -->` |
|
|
|
var testEntity = map[string]string{"何": "What", "is-it": "is it?"} |
|
|
|
var rawTokens = []Token{ |
|
CharData("\n"), |
|
ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, |
|
CharData("\n"), |
|
Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), |
|
CharData("\n"), |
|
StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, |
|
CharData("\n "), |
|
StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, |
|
CharData("World <>'\" 白鵬翔"), |
|
EndElement{Name{"", "hello"}}, |
|
CharData("\n "), |
|
StartElement{Name{"", "query"}, []Attr{}}, |
|
CharData("What is it?"), |
|
EndElement{Name{"", "query"}}, |
|
CharData("\n "), |
|
StartElement{Name{"", "goodbye"}, []Attr{}}, |
|
EndElement{Name{"", "goodbye"}}, |
|
CharData("\n "), |
|
StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, |
|
CharData("\n "), |
|
StartElement{Name{"", "inner"}, []Attr{}}, |
|
EndElement{Name{"", "inner"}}, |
|
CharData("\n "), |
|
EndElement{Name{"", "outer"}}, |
|
CharData("\n "), |
|
StartElement{Name{"tag", "name"}, []Attr{}}, |
|
CharData("\n "), |
|
CharData("Some text here."), |
|
CharData("\n "), |
|
EndElement{Name{"tag", "name"}}, |
|
CharData("\n"), |
|
EndElement{Name{"", "body"}}, |
|
Comment(" missing final newline "), |
|
} |
|
|
|
var cookedTokens = []Token{ |
|
CharData("\n"), |
|
ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, |
|
CharData("\n"), |
|
Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" |
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), |
|
CharData("\n"), |
|
StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, |
|
CharData("\n "), |
|
StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, |
|
CharData("World <>'\" 白鵬翔"), |
|
EndElement{Name{"ns2", "hello"}}, |
|
CharData("\n "), |
|
StartElement{Name{"ns2", "query"}, []Attr{}}, |
|
CharData("What is it?"), |
|
EndElement{Name{"ns2", "query"}}, |
|
CharData("\n "), |
|
StartElement{Name{"ns2", "goodbye"}, []Attr{}}, |
|
EndElement{Name{"ns2", "goodbye"}}, |
|
CharData("\n "), |
|
StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, |
|
CharData("\n "), |
|
StartElement{Name{"ns2", "inner"}, []Attr{}}, |
|
EndElement{Name{"ns2", "inner"}}, |
|
CharData("\n "), |
|
EndElement{Name{"ns2", "outer"}}, |
|
CharData("\n "), |
|
StartElement{Name{"ns3", "name"}, []Attr{}}, |
|
CharData("\n "), |
|
CharData("Some text here."), |
|
CharData("\n "), |
|
EndElement{Name{"ns3", "name"}}, |
|
CharData("\n"), |
|
EndElement{Name{"ns2", "body"}}, |
|
Comment(" missing final newline "), |
|
} |
|
|
|
const testInputAltEncoding = ` |
|
<?xml version="1.0" encoding="x-testing-uppercase"?> |
|
<TAG>VALUE</TAG>` |
|
|
|
var rawTokensAltEncoding = []Token{ |
|
CharData("\n"), |
|
ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, |
|
CharData("\n"), |
|
StartElement{Name{"", "tag"}, []Attr{}}, |
|
CharData("value"), |
|
EndElement{Name{"", "tag"}}, |
|
} |
|
|
|
var xmlInput = []string{ |
|
|
|
"<", |
|
"<t", |
|
"<t ", |
|
"<t/", |
|
"<!", |
|
"<!-", |
|
"<!--", |
|
"<!--c-", |
|
"<!--c--", |
|
"<!d", |
|
"<t></", |
|
"<t></t", |
|
"<?", |
|
"<?p", |
|
"<t a", |
|
"<t a=", |
|
"<t a='", |
|
"<t a=''", |
|
"<t/><![", |
|
"<t/><![C", |
|
"<t/><![CDATA[d", |
|
"<t/><![CDATA[d]", |
|
"<t/><![CDATA[d]]", |
|
|
|
|
|
"<>", |
|
"<t/a", |
|
"<0 />", |
|
"<?0 >", |
|
|
|
"</0>", |
|
"<t 0=''>", |
|
"<t a='&'>", |
|
"<t a='<'>", |
|
"<t> c;</t>", |
|
"<t a>", |
|
"<t a=>", |
|
"<t a=v>", |
|
|
|
"<t></e>", |
|
"<t></>", |
|
"<t></t!", |
|
"<t>cdata]]></t>", |
|
} |
|
|
|
func TestRawToken(t *testing.T) { |
|
d := NewDecoder(strings.NewReader(testInput)) |
|
d.Entity = testEntity |
|
testRawToken(t, d, testInput, rawTokens) |
|
} |
|
|
|
const nonStrictInput = ` |
|
<tag>non&entity</tag> |
|
<tag>&unknown;entity</tag> |
|
<tag>{</tag> |
|
<tag>&#zzz;</tag> |
|
<tag>&なまえ3;</tag> |
|
<tag><-gt;</tag> |
|
<tag>&;</tag> |
|
<tag>&0a;</tag> |
|
` |
|
|
|
var nonStringEntity = map[string]string{"": "oops!", "0a": "oops!"} |
|
|
|
var nonStrictTokens = []Token{ |
|
CharData("\n"), |
|
StartElement{Name{"", "tag"}, []Attr{}}, |
|
CharData("non&entity"), |
|
EndElement{Name{"", "tag"}}, |
|
CharData("\n"), |
|
StartElement{Name{"", "tag"}, []Attr{}}, |
|
CharData("&unknown;entity"), |
|
EndElement{Name{"", "tag"}}, |
|
CharData("\n"), |
|
StartElement{Name{"", "tag"}, []Attr{}}, |
|
CharData("{"), |
|
EndElement{Name{"", "tag"}}, |
|
CharData("\n"), |
|
StartElement{Name{"", "tag"}, []Attr{}}, |
|
CharData("&#zzz;"), |
|
EndElement{Name{"", "tag"}}, |
|
CharData("\n"), |
|
StartElement{Name{"", "tag"}, []Attr{}}, |
|
CharData("&なまえ3;"), |
|
EndElement{Name{"", "tag"}}, |
|
CharData("\n"), |
|
StartElement{Name{"", "tag"}, []Attr{}}, |
|
CharData("<-gt;"), |
|
EndElement{Name{"", "tag"}}, |
|
CharData("\n"), |
|
StartElement{Name{"", "tag"}, []Attr{}}, |
|
CharData("&;"), |
|
EndElement{Name{"", "tag"}}, |
|
CharData("\n"), |
|
StartElement{Name{"", "tag"}, []Attr{}}, |
|
CharData("&0a;"), |
|
EndElement{Name{"", "tag"}}, |
|
CharData("\n"), |
|
} |
|
|
|
func TestNonStrictRawToken(t *testing.T) { |
|
d := NewDecoder(strings.NewReader(nonStrictInput)) |
|
d.Strict = false |
|
testRawToken(t, d, nonStrictInput, nonStrictTokens) |
|
} |
|
|
|
type downCaser struct { |
|
t *testing.T |
|
r io.ByteReader |
|
} |
|
|
|
func (d *downCaser) ReadByte() (c byte, err error) { |
|
c, err = d.r.ReadByte() |
|
if c >= 'A' && c <= 'Z' { |
|
c += 'a' - 'A' |
|
} |
|
return |
|
} |
|
|
|
func (d *downCaser) Read(p []byte) (int, error) { |
|
d.t.Fatalf("unexpected Read call on downCaser reader") |
|
panic("unreachable") |
|
} |
|
|
|
func TestRawTokenAltEncoding(t *testing.T) { |
|
d := NewDecoder(strings.NewReader(testInputAltEncoding)) |
|
d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { |
|
if charset != "x-testing-uppercase" { |
|
t.Fatalf("unexpected charset %q", charset) |
|
} |
|
return &downCaser{t, input.(io.ByteReader)}, nil |
|
} |
|
testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) |
|
} |
|
|
|
func TestRawTokenAltEncodingNoConverter(t *testing.T) { |
|
d := NewDecoder(strings.NewReader(testInputAltEncoding)) |
|
token, err := d.RawToken() |
|
if token == nil { |
|
t.Fatalf("expected a token on first RawToken call") |
|
} |
|
if err != nil { |
|
t.Fatal(err) |
|
} |
|
token, err = d.RawToken() |
|
if token != nil { |
|
t.Errorf("expected a nil token; got %#v", token) |
|
} |
|
if err == nil { |
|
t.Fatalf("expected an error on second RawToken call") |
|
} |
|
const encoding = "x-testing-uppercase" |
|
if !strings.Contains(err.Error(), encoding) { |
|
t.Errorf("expected error to contain %q; got error: %v", |
|
encoding, err) |
|
} |
|
} |
|
|
|
func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { |
|
lastEnd := int64(0) |
|
for i, want := range rawTokens { |
|
start := d.InputOffset() |
|
have, err := d.RawToken() |
|
end := d.InputOffset() |
|
if err != nil { |
|
t.Fatalf("token %d: unexpected error: %s", i, err) |
|
} |
|
if !reflect.DeepEqual(have, want) { |
|
var shave, swant string |
|
if _, ok := have.(CharData); ok { |
|
shave = fmt.Sprintf("CharData(%q)", have) |
|
} else { |
|
shave = fmt.Sprintf("%#v", have) |
|
} |
|
if _, ok := want.(CharData); ok { |
|
swant = fmt.Sprintf("CharData(%q)", want) |
|
} else { |
|
swant = fmt.Sprintf("%#v", want) |
|
} |
|
t.Errorf("token %d = %s, want %s", i, shave, swant) |
|
} |
|
|
|
|
|
switch { |
|
case start < lastEnd: |
|
t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) |
|
case start >= end: |
|
|
|
if start == end && end == lastEnd { |
|
break |
|
} |
|
t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) |
|
case end > int64(len(raw)): |
|
t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) |
|
default: |
|
text := raw[start:end] |
|
if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { |
|
t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) |
|
} |
|
} |
|
lastEnd = end |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
var nestedDirectivesInput = ` |
|
<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> |
|
<!DOCTYPE [<!ENTITY xlt ">">]> |
|
<!DOCTYPE [<!ENTITY xlt "<">]> |
|
<!DOCTYPE [<!ENTITY xlt '>'>]> |
|
<!DOCTYPE [<!ENTITY xlt '<'>]> |
|
<!DOCTYPE [<!ENTITY xlt '">'>]> |
|
<!DOCTYPE [<!ENTITY xlt "'<">]> |
|
` |
|
|
|
var nestedDirectivesTokens = []Token{ |
|
CharData("\n"), |
|
Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), |
|
CharData("\n"), |
|
Directive(`DOCTYPE [<!ENTITY xlt ">">]`), |
|
CharData("\n"), |
|
Directive(`DOCTYPE [<!ENTITY xlt "<">]`), |
|
CharData("\n"), |
|
Directive(`DOCTYPE [<!ENTITY xlt '>'>]`), |
|
CharData("\n"), |
|
Directive(`DOCTYPE [<!ENTITY xlt '<'>]`), |
|
CharData("\n"), |
|
Directive(`DOCTYPE [<!ENTITY xlt '">'>]`), |
|
CharData("\n"), |
|
Directive(`DOCTYPE [<!ENTITY xlt "'<">]`), |
|
CharData("\n"), |
|
} |
|
|
|
func TestNestedDirectives(t *testing.T) { |
|
d := NewDecoder(strings.NewReader(nestedDirectivesInput)) |
|
|
|
for i, want := range nestedDirectivesTokens { |
|
have, err := d.Token() |
|
if err != nil { |
|
t.Fatalf("token %d: unexpected error: %s", i, err) |
|
} |
|
if !reflect.DeepEqual(have, want) { |
|
t.Errorf("token %d = %#v want %#v", i, have, want) |
|
} |
|
} |
|
} |
|
|
|
func TestToken(t *testing.T) { |
|
d := NewDecoder(strings.NewReader(testInput)) |
|
d.Entity = testEntity |
|
|
|
for i, want := range cookedTokens { |
|
have, err := d.Token() |
|
if err != nil { |
|
t.Fatalf("token %d: unexpected error: %s", i, err) |
|
} |
|
if !reflect.DeepEqual(have, want) { |
|
t.Errorf("token %d = %#v want %#v", i, have, want) |
|
} |
|
} |
|
} |
|
|
|
func TestSyntax(t *testing.T) { |
|
for i := range xmlInput { |
|
d := NewDecoder(strings.NewReader(xmlInput[i])) |
|
var err error |
|
for _, err = d.Token(); err == nil; _, err = d.Token() { |
|
} |
|
if _, ok := err.(*SyntaxError); !ok { |
|
t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) |
|
} |
|
} |
|
} |
|
|
|
type allScalars struct { |
|
True1 bool |
|
True2 bool |
|
False1 bool |
|
False2 bool |
|
Int int |
|
Int8 int8 |
|
Int16 int16 |
|
Int32 int32 |
|
Int64 int64 |
|
Uint int |
|
Uint8 uint8 |
|
Uint16 uint16 |
|
Uint32 uint32 |
|
Uint64 uint64 |
|
Uintptr uintptr |
|
Float32 float32 |
|
Float64 float64 |
|
String string |
|
PtrString *string |
|
} |
|
|
|
var all = allScalars{ |
|
True1: true, |
|
True2: true, |
|
False1: false, |
|
False2: false, |
|
Int: 1, |
|
Int8: -2, |
|
Int16: 3, |
|
Int32: -4, |
|
Int64: 5, |
|
Uint: 6, |
|
Uint8: 7, |
|
Uint16: 8, |
|
Uint32: 9, |
|
Uint64: 10, |
|
Uintptr: 11, |
|
Float32: 13.0, |
|
Float64: 14.0, |
|
String: "15", |
|
PtrString: &sixteen, |
|
} |
|
|
|
var sixteen = "16" |
|
|
|
const testScalarsInput = `<allscalars> |
|
<True1>true</True1> |
|
<True2>1</True2> |
|
<False1>false</False1> |
|
<False2>0</False2> |
|
<Int>1</Int> |
|
<Int8>-2</Int8> |
|
<Int16>3</Int16> |
|
<Int32>-4</Int32> |
|
<Int64>5</Int64> |
|
<Uint>6</Uint> |
|
<Uint8>7</Uint8> |
|
<Uint16>8</Uint16> |
|
<Uint32>9</Uint32> |
|
<Uint64>10</Uint64> |
|
<Uintptr>11</Uintptr> |
|
<Float>12.0</Float> |
|
<Float32>13.0</Float32> |
|
<Float64>14.0</Float64> |
|
<String>15</String> |
|
<PtrString>16</PtrString> |
|
</allscalars>` |
|
|
|
func TestAllScalars(t *testing.T) { |
|
var a allScalars |
|
err := Unmarshal([]byte(testScalarsInput), &a) |
|
|
|
if err != nil { |
|
t.Fatal(err) |
|
} |
|
if !reflect.DeepEqual(a, all) { |
|
t.Errorf("have %+v want %+v", a, all) |
|
} |
|
} |
|
|
|
type item struct { |
|
Field_a string |
|
} |
|
|
|
func TestIssue569(t *testing.T) { |
|
data := `<item><Field_a>abcd</Field_a></item>` |
|
var i item |
|
err := Unmarshal([]byte(data), &i) |
|
|
|
if err != nil || i.Field_a != "abcd" { |
|
t.Fatal("Expecting abcd") |
|
} |
|
} |
|
|
|
func TestUnquotedAttrs(t *testing.T) { |
|
data := "<tag attr=azAZ09:-_\t>" |
|
d := NewDecoder(strings.NewReader(data)) |
|
d.Strict = false |
|
token, err := d.Token() |
|
if _, ok := err.(*SyntaxError); ok { |
|
t.Errorf("Unexpected error: %v", err) |
|
} |
|
if token.(StartElement).Name.Local != "tag" { |
|
t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) |
|
} |
|
attr := token.(StartElement).Attr[0] |
|
if attr.Value != "azAZ09:-_" { |
|
t.Errorf("Unexpected attribute value: %v", attr.Value) |
|
} |
|
if attr.Name.Local != "attr" { |
|
t.Errorf("Unexpected attribute name: %v", attr.Name.Local) |
|
} |
|
} |
|
|
|
func TestValuelessAttrs(t *testing.T) { |
|
tests := [][3]string{ |
|
{"<p nowrap>", "p", "nowrap"}, |
|
{"<p nowrap >", "p", "nowrap"}, |
|
{"<input checked/>", "input", "checked"}, |
|
{"<input checked />", "input", "checked"}, |
|
} |
|
for _, test := range tests { |
|
d := NewDecoder(strings.NewReader(test[0])) |
|
d.Strict = false |
|
token, err := d.Token() |
|
if _, ok := err.(*SyntaxError); ok { |
|
t.Errorf("Unexpected error: %v", err) |
|
} |
|
if token.(StartElement).Name.Local != test[1] { |
|
t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) |
|
} |
|
attr := token.(StartElement).Attr[0] |
|
if attr.Value != test[2] { |
|
t.Errorf("Unexpected attribute value: %v", attr.Value) |
|
} |
|
if attr.Name.Local != test[2] { |
|
t.Errorf("Unexpected attribute name: %v", attr.Name.Local) |
|
} |
|
} |
|
} |
|
|
|
func TestCopyTokenCharData(t *testing.T) { |
|
data := []byte("same data") |
|
var tok1 Token = CharData(data) |
|
tok2 := CopyToken(tok1) |
|
if !reflect.DeepEqual(tok1, tok2) { |
|
t.Error("CopyToken(CharData) != CharData") |
|
} |
|
data[1] = 'o' |
|
if reflect.DeepEqual(tok1, tok2) { |
|
t.Error("CopyToken(CharData) uses same buffer.") |
|
} |
|
} |
|
|
|
func TestCopyTokenStartElement(t *testing.T) { |
|
elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} |
|
var tok1 Token = elt |
|
tok2 := CopyToken(tok1) |
|
if tok1.(StartElement).Attr[0].Value != "en" { |
|
t.Error("CopyToken overwrote Attr[0]") |
|
} |
|
if !reflect.DeepEqual(tok1, tok2) { |
|
t.Error("CopyToken(StartElement) != StartElement") |
|
} |
|
tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} |
|
if reflect.DeepEqual(tok1, tok2) { |
|
t.Error("CopyToken(CharData) uses same buffer.") |
|
} |
|
} |
|
|
|
func TestSyntaxErrorLineNum(t *testing.T) { |
|
testInput := "<P>Foo<P>\n\n<P>Bar</>\n" |
|
d := NewDecoder(strings.NewReader(testInput)) |
|
var err error |
|
for _, err = d.Token(); err == nil; _, err = d.Token() { |
|
} |
|
synerr, ok := err.(*SyntaxError) |
|
if !ok { |
|
t.Error("Expected SyntaxError.") |
|
} |
|
if synerr.Line != 3 { |
|
t.Error("SyntaxError didn't have correct line number.") |
|
} |
|
} |
|
|
|
func TestTrailingRawToken(t *testing.T) { |
|
input := `<FOO></FOO> ` |
|
d := NewDecoder(strings.NewReader(input)) |
|
var err error |
|
for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { |
|
} |
|
if err != io.EOF { |
|
t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) |
|
} |
|
} |
|
|
|
func TestTrailingToken(t *testing.T) { |
|
input := `<FOO></FOO> ` |
|
d := NewDecoder(strings.NewReader(input)) |
|
var err error |
|
for _, err = d.Token(); err == nil; _, err = d.Token() { |
|
} |
|
if err != io.EOF { |
|
t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) |
|
} |
|
} |
|
|
|
func TestEntityInsideCDATA(t *testing.T) { |
|
input := `<test><![CDATA[ &val=foo ]]></test>` |
|
d := NewDecoder(strings.NewReader(input)) |
|
var err error |
|
for _, err = d.Token(); err == nil; _, err = d.Token() { |
|
} |
|
if err != io.EOF { |
|
t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) |
|
} |
|
} |
|
|
|
var characterTests = []struct { |
|
in string |
|
err string |
|
}{ |
|
{"\x12<doc/>", "illegal character code U+0012"}, |
|
{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, |
|
{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, |
|
{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, |
|
{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, |
|
{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, |
|
{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, |
|
{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, |
|
{"<doc>&hello;</doc>", "invalid character entity &hello;"}, |
|
} |
|
|
|
func TestDisallowedCharacters(t *testing.T) { |
|
|
|
for i, tt := range characterTests { |
|
d := NewDecoder(strings.NewReader(tt.in)) |
|
var err error |
|
|
|
for err == nil { |
|
_, err = d.Token() |
|
} |
|
synerr, ok := err.(*SyntaxError) |
|
if !ok { |
|
t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) |
|
} |
|
if synerr.Msg != tt.err { |
|
t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) |
|
} |
|
} |
|
} |
|
|
|
type procInstEncodingTest struct { |
|
expect, got string |
|
} |
|
|
|
var procInstTests = []struct { |
|
input string |
|
expect [2]string |
|
}{ |
|
{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, |
|
{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, |
|
{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, |
|
{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, |
|
{`encoding="FOO" `, [2]string{"", "FOO"}}, |
|
} |
|
|
|
func TestProcInstEncoding(t *testing.T) { |
|
for _, test := range procInstTests { |
|
if got := procInst("version", test.input); got != test.expect[0] { |
|
t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) |
|
} |
|
if got := procInst("encoding", test.input); got != test.expect[1] { |
|
t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
var directivesWithCommentsInput = ` |
|
<!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> |
|
<!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> |
|
<!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> |
|
` |
|
|
|
var directivesWithCommentsTokens = []Token{ |
|
CharData("\n"), |
|
Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), |
|
CharData("\n"), |
|
Directive(`DOCTYPE [<!ENTITY go "Golang">]`), |
|
CharData("\n"), |
|
Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang">]`), |
|
CharData("\n"), |
|
} |
|
|
|
func TestDirectivesWithComments(t *testing.T) { |
|
d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) |
|
|
|
for i, want := range directivesWithCommentsTokens { |
|
have, err := d.Token() |
|
if err != nil { |
|
t.Fatalf("token %d: unexpected error: %s", i, err) |
|
} |
|
if !reflect.DeepEqual(have, want) { |
|
t.Errorf("token %d = %#v want %#v", i, have, want) |
|
} |
|
} |
|
} |
|
|
|
|
|
type errWriter struct{} |
|
|
|
func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } |
|
|
|
func TestEscapeTextIOErrors(t *testing.T) { |
|
expectErr := "unwritable" |
|
err := EscapeText(errWriter{}, []byte{'A'}) |
|
|
|
if err == nil || err.Error() != expectErr { |
|
t.Errorf("have %v, want %v", err, expectErr) |
|
} |
|
} |
|
|
|
func TestEscapeTextInvalidChar(t *testing.T) { |
|
input := []byte("A \x00 terminated string.") |
|
expected := "A \uFFFD terminated string." |
|
|
|
buff := new(bytes.Buffer) |
|
if err := EscapeText(buff, input); err != nil { |
|
t.Fatalf("have %v, want nil", err) |
|
} |
|
text := buff.String() |
|
|
|
if text != expected { |
|
t.Errorf("have %v, want %v", text, expected) |
|
} |
|
} |
|
|
|
func TestIssue5880(t *testing.T) { |
|
type T []byte |
|
data, err := Marshal(T{192, 168, 0, 1}) |
|
if err != nil { |
|
t.Errorf("Marshal error: %v", err) |
|
} |
|
if !utf8.Valid(data) { |
|
t.Errorf("Marshal generated invalid UTF-8: %x", data) |
|
} |
|
} |
|
|