diff options
author | René 'Necoro' Neumann <necoro@necoro.eu> | 2022-01-22 22:32:10 +0100 |
---|---|---|
committer | René 'Necoro' Neumann <necoro@necoro.eu> | 2022-01-22 22:32:10 +0100 |
commit | 3c5c4758fb2540eb3db667b376d2dbce9e8187a2 (patch) | |
tree | e5c5fc070df918737e13a6da659815ec8a2c27a1 /encoding.go | |
parent | f6c165150e4bdfbd76414faac1c8be54eeae564c (diff) | |
download | engarde-importer-3c5c4758fb2540eb3db667b376d2dbce9e8187a2.tar.gz engarde-importer-3c5c4758fb2540eb3db667b376d2dbce9e8187a2.tar.bz2 engarde-importer-3c5c4758fb2540eb3db667b376d2dbce9e8187a2.zip |
First setup: Parse CSV content
Diffstat (limited to '')
-rw-r--r-- | encoding.go | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/encoding.go b/encoding.go new file mode 100644 index 0000000..d77cd65 --- /dev/null +++ b/encoding.go @@ -0,0 +1,40 @@ +package main + +import ( + "bytes" + "io" + + "github.com/gogs/chardet" + "golang.org/x/text/encoding/ianaindex" +) + +// getEncodedReader tries to determine the encoding of the content of `r`. +// It returns a new reader that returns UTF-8 content. +func getEncodedReader(r io.Reader) (io.Reader, error) { + buf := make([]byte, 128) + + n, err := io.ReadFull(r, buf) + switch { + case err == io.ErrUnexpectedEOF: + buf = buf[:n] + // as `buf` holds the whole content, we can use it as the underlying reader + r = bytes.NewReader(buf) + case err != nil: + return nil, err + default: + // re-append `buf` + r = io.MultiReader(bytes.NewReader(buf), r) + } + + res, err := chardet.NewTextDetector().DetectBest(buf) + if err != nil { + return nil, err + } + + enc, err := ianaindex.IANA.Encoding(res.Charset) + if err != nil { + return nil, err + } + + return enc.NewDecoder().Reader(r), nil +} |