Skip to content

Commit 6398b29

Browse files
Add fuzzing for charset and for detector functions (#299)
* Add fuzzing for charset detector functions * Add fuzzing for whole library * Upgrade golang CI version to 1.18
1 parent 42b3f45 commit 6398b29

File tree

3 files changed

+103
-10
lines changed

3 files changed

+103
-10
lines changed

.github/workflows/go.yml

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,39 +10,38 @@ permissions:
1010

1111
jobs:
1212
lint:
13-
strategy:
14-
matrix:
15-
go-version: ["1.18.x"]
1613
runs-on: ubuntu-latest
1714
steps:
1815
- name: Install Go
1916
uses: actions/[email protected]
2017
with:
21-
go-version: 1.16
18+
go-version: "1.18.x"
2219
- name: Checkout code
2320
uses: actions/[email protected]
2421
- name: Run linters
2522
uses: golangci/[email protected]
2623
with:
27-
version: "v1.37.1"
28-
go-version: ${{ matrix.go-version }}
24+
version: "v1.45.2"
25+
go-version: "1.18.x"
2926

3027
test:
3128
strategy:
3229
matrix:
33-
go-version: ["1.12.0", "1.18.x"]
3430
platform: [ubuntu-latest, windows-latest]
3531
runs-on: ${{ matrix.platform }}
3632
steps:
3733
- name: Install Go
3834
if: success()
3935
uses: actions/[email protected]
4036
with:
41-
go-version: ${{ matrix.go-version }}
37+
go-version: "1.18.x"
4238
- name: Checkout code
4339
uses: actions/[email protected]
44-
- name: Run tests
45-
run: go test -race ./...
40+
- run: go test -race ./...
41+
- run: go test -fuzz=. -fuzztime=30s
42+
- run: go test -fuzz=Plain -fuzztime=30s ./internal/charset
43+
- run: go test -fuzz=XML -fuzztime=30s ./internal/charset
44+
- run: go test -fuzz=HTML -fuzztime=30s ./internal/charset
4645

4746
coverage:
4847
runs-on: ubuntu-latest

internal/charset/charset_test.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,58 @@ func TestFromPlain(t *testing.T) {
8585
}
8686
}
8787
}
88+
89+
func FuzzFromPlain(f *testing.F) {
90+
samples := [][]byte{
91+
[]byte{0xe6, 0xf8, 0xe5, 0x85, 0x85},
92+
[]byte{0xe6, 0xf8, 0xe5},
93+
[]byte("æøå"),
94+
}
95+
96+
for _, s := range samples {
97+
f.Add(s)
98+
}
99+
100+
f.Fuzz(func(t *testing.T, d []byte) {
101+
if charset := FromPlain(d); charset == "" {
102+
t.Skip()
103+
}
104+
})
105+
}
106+
func FuzzFromHTML(f *testing.F) {
107+
samples := []string{
108+
`<meta charset="c">`,
109+
`<meta charset="щ">`,
110+
`<meta http-equiv="content-type" content="a/b; charset=c">`,
111+
`<meta http-equiv="content-type" content="a/b; charset=щ">`,
112+
`<f 1=2 /><meta charset="c">`,
113+
`<f a=2><meta http-equiv="content-type" content="a/b; charset=c">`,
114+
`<f 1=2 /><meta b="b" charset="c">`,
115+
`<f a=2><meta b="b" http-equiv="content-type" content="a/b; charset=c">`,
116+
}
117+
118+
for _, s := range samples {
119+
f.Add([]byte(s))
120+
}
121+
122+
f.Fuzz(func(t *testing.T, d []byte) {
123+
if charset := FromHTML(d); charset == "" {
124+
t.Skip()
125+
}
126+
})
127+
}
128+
func FuzzFromXML(f *testing.F) {
129+
samples := []string{
130+
`<?xml version="1.0" encoding="c"?>`,
131+
}
132+
133+
for _, s := range samples {
134+
f.Add([]byte(s))
135+
}
136+
137+
f.Fuzz(func(t *testing.T, d []byte) {
138+
if charset := FromXML(d); charset == "" {
139+
t.Skip()
140+
}
141+
})
142+
}

mimetype_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66
"io"
77
"io/ioutil"
8+
"math"
89
"math/rand"
910
"mime"
1011
"os"
@@ -602,3 +603,41 @@ func TestExtend(t *testing.T) {
602603
})
603604
}
604605
}
606+
607+
// Because of the random nature of fuzzing I don't think there is a way to test
608+
// the correctness of the Detect results. Still there is value in fuzzing in
609+
// search for panics.
610+
func FuzzMimetype(f *testing.F) {
611+
// Some of the more interesting file formats. Most formats are detected by
612+
// checking some magic numbers in headers, but these have more complicated
613+
// detection algorithms.
614+
corpus := []string{
615+
"testdata/mkv.mkv",
616+
"testdata/webm.webm",
617+
"testdata/docx.docx",
618+
"testdata/pptx.pptx",
619+
"testdata/xlsx.xlsx",
620+
"testdata/3gp.3gp",
621+
"testdata/class.class",
622+
}
623+
for _, c := range corpus {
624+
data, err := ioutil.ReadFile(c)
625+
if err != nil {
626+
f.Fatal(err)
627+
}
628+
f.Add(data[:100])
629+
}
630+
// First node is root. Remove it because it matches any input.
631+
detectors := root.flatten()[1:]
632+
f.Fuzz(func(t *testing.T, data []byte) {
633+
matched := false
634+
for _, d := range detectors {
635+
if d.detector(data, math.MaxUint32) {
636+
matched = true
637+
}
638+
}
639+
if !matched {
640+
t.Skip()
641+
}
642+
})
643+
}

0 commit comments

Comments
 (0)