Blame - metrics-server/vendor/golang.org/x/text/encoding/encoding.go - kubeia

blob: 221f175c01e1475dab48161f514475de2d449ca2 [file] [log] [blame]

Matthias Andreas Benkard	832a54e	2019-01-29 09:27:38 +0100	[diff] [blame]	1	// Copyright 2013 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	// Package encoding defines an interface for character encodings, such as Shift
				6	// JIS and Windows 1252, that can convert to and from UTF-8.
				7	//
				8	// Encoding implementations are provided in other packages, such as
				9	// golang.org/x/text/encoding/charmap and
				10	// golang.org/x/text/encoding/japanese.
				11	package encoding // import "golang.org/x/text/encoding"
				12
				13	import (
				14	"errors"
				15	"io"
				16	"strconv"
				17	"unicode/utf8"
				18
				19	"golang.org/x/text/encoding/internal/identifier"
				20	"golang.org/x/text/transform"
				21	)
				22
				23	// TODO:
				24	// - There seems to be some inconsistency in when decoders return errors
				25	// and when not. Also documentation seems to suggest they shouldn't return
				26	// errors at all (except for UTF-16).
				27	// - Encoders seem to rely on or at least benefit from the input being in NFC
				28	// normal form. Perhaps add an example how users could prepare their output.
				29
				30	// Encoding is a character set encoding that can be transformed to and from
				31	// UTF-8.
				32	type Encoding interface {
				33	// NewDecoder returns a Decoder.
				34	NewDecoder() *Decoder
				35
				36	// NewEncoder returns an Encoder.
				37	NewEncoder() *Encoder
				38	}
				39
				40	// A Decoder converts bytes to UTF-8. It implements transform.Transformer.
				41	//
				42	// Transforming source bytes that are not of that encoding will not result in an
				43	// error per se. Each byte that cannot be transcoded will be represented in the
				44	// output by the UTF-8 encoding of '\uFFFD', the replacement rune.
				45	type Decoder struct {
				46	transform.Transformer
				47
				48	// This forces external creators of Decoders to use names in struct
				49	// initializers, allowing for future extendibility without having to break
				50	// code.
				51	_ struct{}
				52	}
				53
				54	// Bytes converts the given encoded bytes to UTF-8. It returns the converted
				55	// bytes or nil, err if any error occurred.
				56	func (d *Decoder) Bytes(b []byte) ([]byte, error) {
				57	b, _, err := transform.Bytes(d, b)
				58	if err != nil {
				59	return nil, err
				60	}
				61	return b, nil
				62	}
				63
				64	// String converts the given encoded string to UTF-8. It returns the converted
				65	// string or "", err if any error occurred.
				66	func (d *Decoder) String(s string) (string, error) {
				67	s, _, err := transform.String(d, s)
				68	if err != nil {
				69	return "", err
				70	}
				71	return s, nil
				72	}
				73
				74	// Reader wraps another Reader to decode its bytes.
				75	//
				76	// The Decoder may not be used for any other operation as long as the returned
				77	// Reader is in use.
				78	func (d *Decoder) Reader(r io.Reader) io.Reader {
				79	return transform.NewReader(r, d)
				80	}
				81
				82	// An Encoder converts bytes from UTF-8. It implements transform.Transformer.
				83	//
				84	// Each rune that cannot be transcoded will result in an error. In this case,
				85	// the transform will consume all source byte up to, not including the offending
				86	// rune. Transforming source bytes that are not valid UTF-8 will be replaced by
				87	// `\uFFFD`. To return early with an error instead, use transform.Chain to
				88	// preprocess the data with a UTF8Validator.
				89	type Encoder struct {
				90	transform.Transformer
				91
				92	// This forces external creators of Encoders to use names in struct
				93	// initializers, allowing for future extendibility without having to break
				94	// code.
				95	_ struct{}
				96	}
				97
				98	// Bytes converts bytes from UTF-8. It returns the converted bytes or nil, err if
				99	// any error occurred.
				100	func (e *Encoder) Bytes(b []byte) ([]byte, error) {
				101	b, _, err := transform.Bytes(e, b)
				102	if err != nil {
				103	return nil, err
				104	}
				105	return b, nil
				106	}
				107
				108	// String converts a string from UTF-8. It returns the converted string or
				109	// "", err if any error occurred.
				110	func (e *Encoder) String(s string) (string, error) {
				111	s, _, err := transform.String(e, s)
				112	if err != nil {
				113	return "", err
				114	}
				115	return s, nil
				116	}
				117
				118	// Writer wraps another Writer to encode its UTF-8 output.
				119	//
				120	// The Encoder may not be used for any other operation as long as the returned
				121	// Writer is in use.
				122	func (e *Encoder) Writer(w io.Writer) io.Writer {
				123	return transform.NewWriter(w, e)
				124	}
				125
				126	// ASCIISub is the ASCII substitute character, as recommended by
				127	// http://unicode.org/reports/tr36/#Text_Comparison
				128	const ASCIISub = '\x1a'
				129
				130	// Nop is the nop encoding. Its transformed bytes are the same as the source
				131	// bytes; it does not replace invalid UTF-8 sequences.
				132	var Nop Encoding = nop{}
				133
				134	type nop struct{}
				135
				136	func (nop) NewDecoder() *Decoder {
				137	return &Decoder{Transformer: transform.Nop}
				138	}
				139	func (nop) NewEncoder() *Encoder {
				140	return &Encoder{Transformer: transform.Nop}
				141	}
				142
				143	// Replacement is the replacement encoding. Decoding from the replacement
				144	// encoding yields a single '\uFFFD' replacement rune. Encoding from UTF-8 to
				145	// the replacement encoding yields the same as the source bytes except that
				146	// invalid UTF-8 is converted to '\uFFFD'.
				147	//
				148	// It is defined at http://encoding.spec.whatwg.org/#replacement
				149	var Replacement Encoding = replacement{}
				150
				151	type replacement struct{}
				152
				153	func (replacement) NewDecoder() *Decoder {
				154	return &Decoder{Transformer: replacementDecoder{}}
				155	}
				156
				157	func (replacement) NewEncoder() *Encoder {
				158	return &Encoder{Transformer: replacementEncoder{}}
				159	}
				160
				161	func (replacement) ID() (mib identifier.MIB, other string) {
				162	return identifier.Replacement, ""
				163	}
				164
				165	type replacementDecoder struct{ transform.NopResetter }
				166
				167	func (replacementDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
				168	if len(dst) < 3 {
				169	return 0, 0, transform.ErrShortDst
				170	}
				171	if atEOF {
				172	const fffd = "\ufffd"
				173	dst[0] = fffd[0]
				174	dst[1] = fffd[1]
				175	dst[2] = fffd[2]
				176	nDst = 3
				177	}
				178	return nDst, len(src), nil
				179	}
				180
				181	type replacementEncoder struct{ transform.NopResetter }
				182
				183	func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
				184	r, size := rune(0), 0
				185
				186	for ; nSrc < len(src); nSrc += size {
				187	r = rune(src[nSrc])
				188
				189	// Decode a 1-byte rune.
				190	if r < utf8.RuneSelf {
				191	size = 1
				192
				193	} else {
				194	// Decode a multi-byte rune.
				195	r, size = utf8.DecodeRune(src[nSrc:])
				196	if size == 1 {
				197	// All valid runes of size 1 (those below utf8.RuneSelf) were
				198	// handled above. We have invalid UTF-8 or we haven't seen the
				199	// full character yet.
				200	if !atEOF && !utf8.FullRune(src[nSrc:]) {
				201	err = transform.ErrShortSrc
				202	break
				203	}
				204	r = '\ufffd'
				205	}
				206	}
				207
				208	if nDst+utf8.RuneLen(r) > len(dst) {
				209	err = transform.ErrShortDst
				210	break
				211	}
				212	nDst += utf8.EncodeRune(dst[nDst:], r)
				213	}
				214	return nDst, nSrc, err
				215	}
				216
				217	// HTMLEscapeUnsupported wraps encoders to replace source runes outside the
				218	// repertoire of the destination encoding with HTML escape sequences.
				219	//
				220	// This wrapper exists to comply to URL and HTML forms requiring a
				221	// non-terminating legacy encoder. The produced sequences may lead to data
				222	// loss as they are indistinguishable from legitimate input. To avoid this
				223	// issue, use UTF-8 encodings whenever possible.
				224	func HTMLEscapeUnsupported(e Encoder) Encoder {
				225	return &Encoder{Transformer: &errorHandler{e, errorToHTML}}
				226	}
				227
				228	// ReplaceUnsupported wraps encoders to replace source runes outside the
				229	// repertoire of the destination encoding with an encoding-specific
				230	// replacement.
				231	//
				232	// This wrapper is only provided for backwards compatibility and legacy
				233	// handling. Its use is strongly discouraged. Use UTF-8 whenever possible.
				234	func ReplaceUnsupported(e Encoder) Encoder {
				235	return &Encoder{Transformer: &errorHandler{e, errorToReplacement}}
				236	}
				237
				238	type errorHandler struct {
				239	*Encoder
				240	handler func(dst []byte, r rune, err repertoireError) (n int, ok bool)
				241	}
				242
				243	// TODO: consider making this error public in some form.
				244	type repertoireError interface {
				245	Replacement() byte
				246	}
				247
				248	func (h errorHandler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
				249	nDst, nSrc, err = h.Transformer.Transform(dst, src, atEOF)
				250	for err != nil {
				251	rerr, ok := err.(repertoireError)
				252	if !ok {
				253	return nDst, nSrc, err
				254	}
				255	r, sz := utf8.DecodeRune(src[nSrc:])
				256	n, ok := h.handler(dst[nDst:], r, rerr)
				257	if !ok {
				258	return nDst, nSrc, transform.ErrShortDst
				259	}
				260	err = nil
				261	nDst += n
				262	if nSrc += sz; nSrc < len(src) {
				263	var dn, sn int
				264	dn, sn, err = h.Transformer.Transform(dst[nDst:], src[nSrc:], atEOF)
				265	nDst += dn
				266	nSrc += sn
				267	}
				268	}
				269	return nDst, nSrc, err
				270	}
				271
				272	func errorToHTML(dst []byte, r rune, err repertoireError) (n int, ok bool) {
				273	buf := [8]byte{}
				274	b := strconv.AppendUint(buf[:0], uint64(r), 10)
				275	if n = len(b) + len("&#;"); n >= len(dst) {
				276	return 0, false
				277	}
				278	dst[0] = '&'
				279	dst[1] = '#'
				280	dst[copy(dst[2:], b)+2] = ';'
				281	return n, true
				282	}
				283
				284	func errorToReplacement(dst []byte, r rune, err repertoireError) (n int, ok bool) {
				285	if len(dst) == 0 {
				286	return 0, false
				287	}
				288	dst[0] = err.Replacement()
				289	return 1, true
				290	}
				291
				292	// ErrInvalidUTF8 means that a transformer encountered invalid UTF-8.
				293	var ErrInvalidUTF8 = errors.New("encoding: invalid UTF-8")
				294
				295	// UTF8Validator is a transformer that returns ErrInvalidUTF8 on the first
				296	// input byte that is not valid UTF-8.
				297	var UTF8Validator transform.Transformer = utf8Validator{}
				298
				299	type utf8Validator struct{ transform.NopResetter }
				300
				301	func (utf8Validator) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
				302	n := len(src)
				303	if n > len(dst) {
				304	n = len(dst)
				305	}
				306	for i := 0; i < n; {
				307	if c := src[i]; c < utf8.RuneSelf {
				308	dst[i] = c
				309	i++
				310	continue
				311	}
				312	_, size := utf8.DecodeRune(src[i:])
				313	if size == 1 {
				314	// All valid runes of size 1 (those below utf8.RuneSelf) were
				315	// handled above. We have invalid UTF-8 or we haven't seen the
				316	// full character yet.
				317	err = ErrInvalidUTF8
				318	if !atEOF && !utf8.FullRune(src[i:]) {
				319	err = transform.ErrShortSrc
				320	}
				321	return i, i, err
				322	}
				323	if i+size > len(dst) {
				324	return i, i, transform.ErrShortDst
				325	}
				326	for ; size > 0; size-- {
				327	dst[i] = src[i]
				328	i++
				329	}
				330	}
				331	if len(src) > len(dst) {
				332	err = transform.ErrShortDst
				333	}
				334	return n, n, err
				335	}