summaryrefslogtreecommitdiff
path: root/encoding.go
blob: d77cd655b7e208a6532be74f48f72bc5d5a80ad9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
package main

import (
	"bytes"
	"io"

	"github.com/gogs/chardet"
	"golang.org/x/text/encoding/ianaindex"
)

// getEncodedReader tries to determine the encoding of the content of `r`.
// It returns a new reader that returns UTF-8 content.
func getEncodedReader(r io.Reader) (io.Reader, error) {
	buf := make([]byte, 128)

	n, err := io.ReadFull(r, buf)
	switch {
	case err == io.ErrUnexpectedEOF:
		buf = buf[:n]
		// as `buf` holds the whole content, we can use it as the underlying reader
		r = bytes.NewReader(buf)
	case err != nil:
		return nil, err
	default:
		// re-append `buf`
		r = io.MultiReader(bytes.NewReader(buf), r)
	}

	res, err := chardet.NewTextDetector().DetectBest(buf)
	if err != nil {
		return nil, err
	}

	enc, err := ianaindex.IANA.Encoding(res.Charset)
	if err != nil {
		return nil, err
	}

	return enc.NewDecoder().Reader(r), nil
}