summaryrefslogtreecommitdiff
path: root/encoding.go
diff options
context:
space:
mode:
authorRené 'Necoro' Neumann <necoro@necoro.eu>2022-01-22 22:32:10 +0100
committerRené 'Necoro' Neumann <necoro@necoro.eu>2022-01-22 22:32:10 +0100
commit3c5c4758fb2540eb3db667b376d2dbce9e8187a2 (patch)
treee5c5fc070df918737e13a6da659815ec8a2c27a1 /encoding.go
parentf6c165150e4bdfbd76414faac1c8be54eeae564c (diff)
downloadengarde-importer-3c5c4758fb2540eb3db667b376d2dbce9e8187a2.tar.gz
engarde-importer-3c5c4758fb2540eb3db667b376d2dbce9e8187a2.tar.bz2
engarde-importer-3c5c4758fb2540eb3db667b376d2dbce9e8187a2.zip
First setup: Parse CSV content
Diffstat (limited to 'encoding.go')
-rw-r--r--encoding.go40
1 files changed, 40 insertions, 0 deletions
diff --git a/encoding.go b/encoding.go
new file mode 100644
index 0000000..d77cd65
--- /dev/null
+++ b/encoding.go
@@ -0,0 +1,40 @@
+package main
+
+import (
+ "bytes"
+ "io"
+
+ "github.com/gogs/chardet"
+ "golang.org/x/text/encoding/ianaindex"
+)
+
+// getEncodedReader tries to determine the encoding of the content of `r`.
+// It returns a new reader that returns UTF-8 content.
+func getEncodedReader(r io.Reader) (io.Reader, error) {
+ buf := make([]byte, 128)
+
+ n, err := io.ReadFull(r, buf)
+ switch {
+ case err == io.ErrUnexpectedEOF:
+ buf = buf[:n]
+ // as `buf` holds the whole content, we can use it as the underlying reader
+ r = bytes.NewReader(buf)
+ case err != nil:
+ return nil, err
+ default:
+ // re-append `buf`
+ r = io.MultiReader(bytes.NewReader(buf), r)
+ }
+
+ res, err := chardet.NewTextDetector().DetectBest(buf)
+ if err != nil {
+ return nil, err
+ }
+
+ enc, err := ianaindex.IANA.Encoding(res.Charset)
+ if err != nil {
+ return nil, err
+ }
+
+ return enc.NewDecoder().Reader(r), nil
+}