Blame - metrics-server/vendor/github.com/PuerkitoBio/purell/purell.go - kubeia

blob: 645e1b76f7945b390bd453f49b03efc297e9e5a8 [file] [log] [blame]

Matthias Andreas Benkard	832a54e	2019-01-29 09:27:38 +0100	[diff] [blame]	1	/*
				2	Package purell offers URL normalization as described on the wikipedia page:
				3	http://en.wikipedia.org/wiki/URL_normalization
				4	*/
				5	package purell
				6
				7	import (
				8	"bytes"
				9	"fmt"
				10	"net/url"
				11	"regexp"
				12	"sort"
				13	"strconv"
				14	"strings"
				15
				16	"github.com/PuerkitoBio/urlesc"
				17	"golang.org/x/net/idna"
				18	"golang.org/x/text/unicode/norm"
				19	"golang.org/x/text/width"
				20	)
				21
				22	// A set of normalization flags determines how a URL will
				23	// be normalized.
				24	type NormalizationFlags uint
				25
				26	const (
				27	// Safe normalizations
				28	FlagLowercaseScheme NormalizationFlags = 1 << iota // HTTP://host -> http://host, applied by default in Go1.1
				29	FlagLowercaseHost // http://HOST -> http://host
				30	FlagUppercaseEscapes // http://host/t%ef -> http://host/t%EF
				31	FlagDecodeUnnecessaryEscapes // http://host/t%41 -> http://host/tA
				32	FlagEncodeNecessaryEscapes // http://host/!"#$ -> http://host/%21%22#$
				33	FlagRemoveDefaultPort // http://host:80 -> http://host
				34	FlagRemoveEmptyQuerySeparator // http://host/path? -> http://host/path
				35
				36	// Usually safe normalizations
				37	FlagRemoveTrailingSlash // http://host/path/ -> http://host/path
				38	FlagAddTrailingSlash // http://host/path -> http://host/path/ (should choose only one of these add/remove trailing slash flags)
				39	FlagRemoveDotSegments // http://host/path/./a/b/../c -> http://host/path/a/c
				40
				41	// Unsafe normalizations
				42	FlagRemoveDirectoryIndex // http://host/path/index.html -> http://host/path/
				43	FlagRemoveFragment // http://host/path#fragment -> http://host/path
				44	FlagForceHTTP // https://host -> http://host
				45	FlagRemoveDuplicateSlashes // http://host/path//a///b -> http://host/path/a/b
				46	FlagRemoveWWW // http://www.host/ -> http://host/
				47	FlagAddWWW // http://host/ -> http://www.host/ (should choose only one of these add/remove WWW flags)
				48	FlagSortQuery // http://host/path?c=3&b=2&a=1&b=1 -> http://host/path?a=1&b=1&b=2&c=3
				49
				50	// Normalizations not in the wikipedia article, required to cover tests cases
				51	// submitted by jehiah
				52	FlagDecodeDWORDHost // http://1113982867 -> http://66.102.7.147
				53	FlagDecodeOctalHost // http://0102.0146.07.0223 -> http://66.102.7.147
				54	FlagDecodeHexHost // http://0x42660793 -> http://66.102.7.147
				55	FlagRemoveUnnecessaryHostDots // http://.host../path -> http://host/path
				56	FlagRemoveEmptyPortSeparator // http://host:/path -> http://host/path
				57
				58	// Convenience set of safe normalizations
				59	FlagsSafe NormalizationFlags = FlagLowercaseHost \| FlagLowercaseScheme \| FlagUppercaseEscapes \| FlagDecodeUnnecessaryEscapes \| FlagEncodeNecessaryEscapes \| FlagRemoveDefaultPort \| FlagRemoveEmptyQuerySeparator
				60
				61	// For convenience sets, "greedy" uses the "remove trailing slash" and "remove www. prefix" flags,
				62	// while "non-greedy" uses the "add (or keep) the trailing slash" and "add www. prefix".
				63
				64	// Convenience set of usually safe normalizations (includes FlagsSafe)
				65	FlagsUsuallySafeGreedy NormalizationFlags = FlagsSafe \| FlagRemoveTrailingSlash \| FlagRemoveDotSegments
				66	FlagsUsuallySafeNonGreedy NormalizationFlags = FlagsSafe \| FlagAddTrailingSlash \| FlagRemoveDotSegments
				67
				68	// Convenience set of unsafe normalizations (includes FlagsUsuallySafe)
				69	FlagsUnsafeGreedy NormalizationFlags = FlagsUsuallySafeGreedy \| FlagRemoveDirectoryIndex \| FlagRemoveFragment \| FlagForceHTTP \| FlagRemoveDuplicateSlashes \| FlagRemoveWWW \| FlagSortQuery
				70	FlagsUnsafeNonGreedy NormalizationFlags = FlagsUsuallySafeNonGreedy \| FlagRemoveDirectoryIndex \| FlagRemoveFragment \| FlagForceHTTP \| FlagRemoveDuplicateSlashes \| FlagAddWWW \| FlagSortQuery
				71
				72	// Convenience set of all available flags
				73	FlagsAllGreedy = FlagsUnsafeGreedy \| FlagDecodeDWORDHost \| FlagDecodeOctalHost \| FlagDecodeHexHost \| FlagRemoveUnnecessaryHostDots \| FlagRemoveEmptyPortSeparator
				74	FlagsAllNonGreedy = FlagsUnsafeNonGreedy \| FlagDecodeDWORDHost \| FlagDecodeOctalHost \| FlagDecodeHexHost \| FlagRemoveUnnecessaryHostDots \| FlagRemoveEmptyPortSeparator
				75	)
				76
				77	const (
				78	defaultHttpPort = ":80"
				79	defaultHttpsPort = ":443"
				80	)
				81
				82	// Regular expressions used by the normalizations
				83	var rxPort = regexp.MustCompile(`(:\d+)/?$`)
				84	var rxDirIndex = regexp.MustCompile(`(^\|/)((?:default\|index)\.\w{1,4})$`)
				85	var rxDupSlashes = regexp.MustCompile(`/{2,}`)
				86	var rxDWORDHost = regexp.MustCompile(`^(\d+)((?:\.+)?(?:\:\d*)?)$`)
				87	var rxOctalHost = regexp.MustCompile(`^(0\d)\.(0\d)\.(0\d)\.(0\d)((?:\.+)?(?:\:\d*)?)$`)
				88	var rxHexHost = regexp.MustCompile(`^0x([0-9A-Fa-f]+)((?:\.+)?(?:\:\d*)?)$`)
				89	var rxHostDots = regexp.MustCompile(`^(.+?)(:\d+)?$`)
				90	var rxEmptyPort = regexp.MustCompile(`:+$`)
				91
				92	// Map of flags to implementation function.
				93	// FlagDecodeUnnecessaryEscapes has no action, since it is done automatically
				94	// by parsing the string as an URL. Same for FlagUppercaseEscapes and FlagRemoveEmptyQuerySeparator.
				95
				96	// Since maps have undefined traversing order, make a slice of ordered keys
				97	var flagsOrder = []NormalizationFlags{
				98	FlagLowercaseScheme,
				99	FlagLowercaseHost,
				100	FlagRemoveDefaultPort,
				101	FlagRemoveDirectoryIndex,
				102	FlagRemoveDotSegments,
				103	FlagRemoveFragment,
				104	FlagForceHTTP, // Must be after remove default port (because https=443/http=80)
				105	FlagRemoveDuplicateSlashes,
				106	FlagRemoveWWW,
				107	FlagAddWWW,
				108	FlagSortQuery,
				109	FlagDecodeDWORDHost,
				110	FlagDecodeOctalHost,
				111	FlagDecodeHexHost,
				112	FlagRemoveUnnecessaryHostDots,
				113	FlagRemoveEmptyPortSeparator,
				114	FlagRemoveTrailingSlash, // These two (add/remove trailing slash) must be last
				115	FlagAddTrailingSlash,
				116	}
				117
				118	// ... and then the map, where order is unimportant
				119	var flags = map[NormalizationFlags]func(*url.URL){
				120	FlagLowercaseScheme: lowercaseScheme,
				121	FlagLowercaseHost: lowercaseHost,
				122	FlagRemoveDefaultPort: removeDefaultPort,
				123	FlagRemoveDirectoryIndex: removeDirectoryIndex,
				124	FlagRemoveDotSegments: removeDotSegments,
				125	FlagRemoveFragment: removeFragment,
				126	FlagForceHTTP: forceHTTP,
				127	FlagRemoveDuplicateSlashes: removeDuplicateSlashes,
				128	FlagRemoveWWW: removeWWW,
				129	FlagAddWWW: addWWW,
				130	FlagSortQuery: sortQuery,
				131	FlagDecodeDWORDHost: decodeDWORDHost,
				132	FlagDecodeOctalHost: decodeOctalHost,
				133	FlagDecodeHexHost: decodeHexHost,
				134	FlagRemoveUnnecessaryHostDots: removeUnncessaryHostDots,
				135	FlagRemoveEmptyPortSeparator: removeEmptyPortSeparator,
				136	FlagRemoveTrailingSlash: removeTrailingSlash,
				137	FlagAddTrailingSlash: addTrailingSlash,
				138	}
				139
				140	// MustNormalizeURLString returns the normalized string, and panics if an error occurs.
				141	// It takes an URL string as input, as well as the normalization flags.
				142	func MustNormalizeURLString(u string, f NormalizationFlags) string {
				143	result, e := NormalizeURLString(u, f)
				144	if e != nil {
				145	panic(e)
				146	}
				147	return result
				148	}
				149
				150	// NormalizeURLString returns the normalized string, or an error if it can't be parsed into an URL object.
				151	// It takes an URL string as input, as well as the normalization flags.
				152	func NormalizeURLString(u string, f NormalizationFlags) (string, error) {
				153	parsed, err := url.Parse(u)
				154	if err != nil {
				155	return "", err
				156	}
				157
				158	if f&FlagLowercaseHost == FlagLowercaseHost {
				159	parsed.Host = strings.ToLower(parsed.Host)
				160	}
				161
				162	// The idna package doesn't fully conform to RFC 5895
				163	// (https://tools.ietf.org/html/rfc5895), so we do it here.
				164	// Taken from Go 1.8 cycle source, courtesy of bradfitz.
				165	// TODO: Remove when (if?) idna package conforms to RFC 5895.
				166	parsed.Host = width.Fold.String(parsed.Host)
				167	parsed.Host = norm.NFC.String(parsed.Host)
				168	if parsed.Host, err = idna.ToASCII(parsed.Host); err != nil {
				169	return "", err
				170	}
				171
				172	return NormalizeURL(parsed, f), nil
				173	}
				174
				175	// NormalizeURL returns the normalized string.
				176	// It takes a parsed URL object as input, as well as the normalization flags.
				177	func NormalizeURL(u *url.URL, f NormalizationFlags) string {
				178	for _, k := range flagsOrder {
				179	if f&k == k {
				180	flags[k](u)
				181	}
				182	}
				183	return urlesc.Escape(u)
				184	}
				185
				186	func lowercaseScheme(u *url.URL) {
				187	if len(u.Scheme) > 0 {
				188	u.Scheme = strings.ToLower(u.Scheme)
				189	}
				190	}
				191
				192	func lowercaseHost(u *url.URL) {
				193	if len(u.Host) > 0 {
				194	u.Host = strings.ToLower(u.Host)
				195	}
				196	}
				197
				198	func removeDefaultPort(u *url.URL) {
				199	if len(u.Host) > 0 {
				200	scheme := strings.ToLower(u.Scheme)
				201	u.Host = rxPort.ReplaceAllStringFunc(u.Host, func(val string) string {
				202	if (scheme == "http" && val == defaultHttpPort) \|\| (scheme == "https" && val == defaultHttpsPort) {
				203	return ""
				204	}
				205	return val
				206	})
				207	}
				208	}
				209
				210	func removeTrailingSlash(u *url.URL) {
				211	if l := len(u.Path); l > 0 {
				212	if strings.HasSuffix(u.Path, "/") {
				213	u.Path = u.Path[:l-1]
				214	}
				215	} else if l = len(u.Host); l > 0 {
				216	if strings.HasSuffix(u.Host, "/") {
				217	u.Host = u.Host[:l-1]
				218	}
				219	}
				220	}
				221
				222	func addTrailingSlash(u *url.URL) {
				223	if l := len(u.Path); l > 0 {
				224	if !strings.HasSuffix(u.Path, "/") {
				225	u.Path += "/"
				226	}
				227	} else if l = len(u.Host); l > 0 {
				228	if !strings.HasSuffix(u.Host, "/") {
				229	u.Host += "/"
				230	}
				231	}
				232	}
				233
				234	func removeDotSegments(u *url.URL) {
				235	if len(u.Path) > 0 {
				236	var dotFree []string
				237	var lastIsDot bool
				238
				239	sections := strings.Split(u.Path, "/")
				240	for _, s := range sections {
				241	if s == ".." {
				242	if len(dotFree) > 0 {
				243	dotFree = dotFree[:len(dotFree)-1]
				244	}
				245	} else if s != "." {
				246	dotFree = append(dotFree, s)
				247	}
				248	lastIsDot = (s == "." \|\| s == "..")
				249	}
				250	// Special case if host does not end with / and new path does not begin with /
				251	u.Path = strings.Join(dotFree, "/")
				252	if u.Host != "" && !strings.HasSuffix(u.Host, "/") && !strings.HasPrefix(u.Path, "/") {
				253	u.Path = "/" + u.Path
				254	}
				255	// Special case if the last segment was a dot, make sure the path ends with a slash
				256	if lastIsDot && !strings.HasSuffix(u.Path, "/") {
				257	u.Path += "/"
				258	}
				259	}
				260	}
				261
				262	func removeDirectoryIndex(u *url.URL) {
				263	if len(u.Path) > 0 {
				264	u.Path = rxDirIndex.ReplaceAllString(u.Path, "$1")
				265	}
				266	}
				267
				268	func removeFragment(u *url.URL) {
				269	u.Fragment = ""
				270	}
				271
				272	func forceHTTP(u *url.URL) {
				273	if strings.ToLower(u.Scheme) == "https" {
				274	u.Scheme = "http"
				275	}
				276	}
				277
				278	func removeDuplicateSlashes(u *url.URL) {
				279	if len(u.Path) > 0 {
				280	u.Path = rxDupSlashes.ReplaceAllString(u.Path, "/")
				281	}
				282	}
				283
				284	func removeWWW(u *url.URL) {
				285	if len(u.Host) > 0 && strings.HasPrefix(strings.ToLower(u.Host), "www.") {
				286	u.Host = u.Host[4:]
				287	}
				288	}
				289
				290	func addWWW(u *url.URL) {
				291	if len(u.Host) > 0 && !strings.HasPrefix(strings.ToLower(u.Host), "www.") {
				292	u.Host = "www." + u.Host
				293	}
				294	}
				295
				296	func sortQuery(u *url.URL) {
				297	q := u.Query()
				298
				299	if len(q) > 0 {
				300	arKeys := make([]string, len(q))
				301	i := 0
				302	for k, _ := range q {
				303	arKeys[i] = k
				304	i++
				305	}
				306	sort.Strings(arKeys)
				307	buf := new(bytes.Buffer)
				308	for _, k := range arKeys {
				309	sort.Strings(q[k])
				310	for _, v := range q[k] {
				311	if buf.Len() > 0 {
				312	buf.WriteRune('&')
				313	}
				314	buf.WriteString(fmt.Sprintf("%s=%s", k, urlesc.QueryEscape(v)))
				315	}
				316	}
				317
				318	// Rebuild the raw query string
				319	u.RawQuery = buf.String()
				320	}
				321	}
				322
				323	func decodeDWORDHost(u *url.URL) {
				324	if len(u.Host) > 0 {
				325	if matches := rxDWORDHost.FindStringSubmatch(u.Host); len(matches) > 2 {
				326	var parts [4]int64
				327
				328	dword, _ := strconv.ParseInt(matches[1], 10, 0)
				329	for i, shift := range []uint{24, 16, 8, 0} {
				330	parts[i] = dword >> shift & 0xFF
				331	}
				332	u.Host = fmt.Sprintf("%d.%d.%d.%d%s", parts[0], parts[1], parts[2], parts[3], matches[2])
				333	}
				334	}
				335	}
				336
				337	func decodeOctalHost(u *url.URL) {
				338	if len(u.Host) > 0 {
				339	if matches := rxOctalHost.FindStringSubmatch(u.Host); len(matches) > 5 {
				340	var parts [4]int64
				341
				342	for i := 1; i <= 4; i++ {
				343	parts[i-1], _ = strconv.ParseInt(matches[i], 8, 0)
				344	}
				345	u.Host = fmt.Sprintf("%d.%d.%d.%d%s", parts[0], parts[1], parts[2], parts[3], matches[5])
				346	}
				347	}
				348	}
				349
				350	func decodeHexHost(u *url.URL) {
				351	if len(u.Host) > 0 {
				352	if matches := rxHexHost.FindStringSubmatch(u.Host); len(matches) > 2 {
				353	// Conversion is safe because of regex validation
				354	parsed, _ := strconv.ParseInt(matches[1], 16, 0)
				355	// Set host as DWORD (base 10) encoded host
				356	u.Host = fmt.Sprintf("%d%s", parsed, matches[2])
				357	// The rest is the same as decoding a DWORD host
				358	decodeDWORDHost(u)
				359	}
				360	}
				361	}
				362
				363	func removeUnncessaryHostDots(u *url.URL) {
				364	if len(u.Host) > 0 {
				365	if matches := rxHostDots.FindStringSubmatch(u.Host); len(matches) > 1 {
				366	// Trim the leading and trailing dots
				367	u.Host = strings.Trim(matches[1], ".")
				368	if len(matches) > 2 {
				369	u.Host += matches[2]
				370	}
				371	}
				372	}
				373	}
				374
				375	func removeEmptyPortSeparator(u *url.URL) {
				376	if len(u.Host) > 0 {
				377	u.Host = rxEmptyPort.ReplaceAllString(u.Host, "")
				378	}
				379	}