summaryrefslogtreecommitdiff
path: root/encoding.go
blob: b7511c5a4958fe9e3c500741bfdfb2fbe5a72d3e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
package main

import (
	"bytes"
	"io"

	"github.com/gogs/chardet"
	"golang.org/x/text/encoding/charmap"
	"golang.org/x/text/encoding/ianaindex"
)

// encodedReader tries to determine the encoding of the content of `r`.
// It returns a new reader that returns UTF-8 content.
func encodedReader(r io.Reader) (io.Reader, error) {
	buf := make([]byte, 128)

	n, err := io.ReadFull(r, buf)
	switch {
	case err == io.ErrUnexpectedEOF:
		buf = buf[:n]
		// as `buf` holds the whole content, we can use it as the underlying reader
		r = bytes.NewReader(buf)
	case err != nil:
		return nil, err
	default:
		// re-append `buf`
		r = io.MultiReader(bytes.NewReader(buf), r)
	}

	res, err := chardet.NewTextDetector().DetectBest(buf)
	if err != nil {
		return nil, err
	}

	enc, err := ianaindex.IANA.Encoding(res.Charset)
	if err != nil {
		return nil, err
	}

	return enc.NewDecoder().Reader(r), nil
}

// encodedWriter returns a wrapper around the passed in writer that encodes into the expected charset.
func encodedWriter(w io.Writer) io.Writer {
	return charmap.ISO8859_1.NewEncoder().Writer(w)
}