blob: d77cd655b7e208a6532be74f48f72bc5d5a80ad9 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
package main
import (
"bytes"
"io"
"github.com/gogs/chardet"
"golang.org/x/text/encoding/ianaindex"
)
// getEncodedReader tries to determine the encoding of the content of `r`.
// It returns a new reader that returns UTF-8 content.
func getEncodedReader(r io.Reader) (io.Reader, error) {
buf := make([]byte, 128)
n, err := io.ReadFull(r, buf)
switch {
case err == io.ErrUnexpectedEOF:
buf = buf[:n]
// as `buf` holds the whole content, we can use it as the underlying reader
r = bytes.NewReader(buf)
case err != nil:
return nil, err
default:
// re-append `buf`
r = io.MultiReader(bytes.NewReader(buf), r)
}
res, err := chardet.NewTextDetector().DetectBest(buf)
if err != nil {
return nil, err
}
enc, err := ianaindex.IANA.Encoding(res.Charset)
if err != nil {
return nil, err
}
return enc.NewDecoder().Reader(r), nil
}
|