From 3c5c4758fb2540eb3db667b376d2dbce9e8187a2 Mon Sep 17 00:00:00 2001 From: René 'Necoro' Neumann Date: Sat, 22 Jan 2022 22:32:10 +0100 Subject: First setup: Parse CSV content --- encoding.go | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 encoding.go (limited to 'encoding.go') diff --git a/encoding.go b/encoding.go new file mode 100644 index 0000000..d77cd65 --- /dev/null +++ b/encoding.go @@ -0,0 +1,40 @@ +package main + +import ( + "bytes" + "io" + + "github.com/gogs/chardet" + "golang.org/x/text/encoding/ianaindex" +) + +// getEncodedReader tries to determine the encoding of the content of `r`. +// It returns a new reader that returns UTF-8 content. +func getEncodedReader(r io.Reader) (io.Reader, error) { + buf := make([]byte, 128) + + n, err := io.ReadFull(r, buf) + switch { + case err == io.ErrUnexpectedEOF: + buf = buf[:n] + // as `buf` holds the whole content, we can use it as the underlying reader + r = bytes.NewReader(buf) + case err != nil: + return nil, err + default: + // re-append `buf` + r = io.MultiReader(bytes.NewReader(buf), r) + } + + res, err := chardet.NewTextDetector().DetectBest(buf) + if err != nil { + return nil, err + } + + enc, err := ianaindex.IANA.Encoding(res.Charset) + if err != nil { + return nil, err + } + + return enc.NewDecoder().Reader(r), nil +} -- cgit v1.2.3-54-g00ecf