blob: 1b84624594d0dc6658b40fe9301c352ef60a6b83 [file] [log] [blame]
Matthias Andreas Benkard832a54e2019-01-29 09:27:38 +01001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package urlesc implements query escaping as per RFC 3986.
6// It contains some parts of the net/url package, modified so as to allow
7// some reserved characters incorrectly escaped by net/url.
8// See https://github.com/golang/go/issues/5684
9package urlesc
10
11import (
12 "bytes"
13 "net/url"
14 "strings"
15)
16
17type encoding int
18
19const (
20 encodePath encoding = 1 + iota
21 encodeUserPassword
22 encodeQueryComponent
23 encodeFragment
24)
25
26// Return true if the specified character should be escaped when
27// appearing in a URL string, according to RFC 3986.
28func shouldEscape(c byte, mode encoding) bool {
29 // §2.3 Unreserved characters (alphanum)
30 if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
31 return false
32 }
33
34 switch c {
35 case '-', '.', '_', '~': // §2.3 Unreserved characters (mark)
36 return false
37
38 // §2.2 Reserved characters (reserved)
39 case ':', '/', '?', '#', '[', ']', '@', // gen-delims
40 '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': // sub-delims
41 // Different sections of the URL allow a few of
42 // the reserved characters to appear unescaped.
43 switch mode {
44 case encodePath: // §3.3
45 // The RFC allows sub-delims and : @.
46 // '/', '[' and ']' can be used to assign meaning to individual path
47 // segments. This package only manipulates the path as a whole,
48 // so we allow those as well. That leaves only ? and # to escape.
49 return c == '?' || c == '#'
50
51 case encodeUserPassword: // §3.2.1
52 // The RFC allows : and sub-delims in
53 // userinfo. The parsing of userinfo treats ':' as special so we must escape
54 // all the gen-delims.
55 return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' || c == ']' || c == '@'
56
57 case encodeQueryComponent: // §3.4
58 // The RFC allows / and ?.
59 return c != '/' && c != '?'
60
61 case encodeFragment: // §4.1
62 // The RFC text is silent but the grammar allows
63 // everything, so escape nothing but #
64 return c == '#'
65 }
66 }
67
68 // Everything else must be escaped.
69 return true
70}
71
72// QueryEscape escapes the string so it can be safely placed
73// inside a URL query.
74func QueryEscape(s string) string {
75 return escape(s, encodeQueryComponent)
76}
77
78func escape(s string, mode encoding) string {
79 spaceCount, hexCount := 0, 0
80 for i := 0; i < len(s); i++ {
81 c := s[i]
82 if shouldEscape(c, mode) {
83 if c == ' ' && mode == encodeQueryComponent {
84 spaceCount++
85 } else {
86 hexCount++
87 }
88 }
89 }
90
91 if spaceCount == 0 && hexCount == 0 {
92 return s
93 }
94
95 t := make([]byte, len(s)+2*hexCount)
96 j := 0
97 for i := 0; i < len(s); i++ {
98 switch c := s[i]; {
99 case c == ' ' && mode == encodeQueryComponent:
100 t[j] = '+'
101 j++
102 case shouldEscape(c, mode):
103 t[j] = '%'
104 t[j+1] = "0123456789ABCDEF"[c>>4]
105 t[j+2] = "0123456789ABCDEF"[c&15]
106 j += 3
107 default:
108 t[j] = s[i]
109 j++
110 }
111 }
112 return string(t)
113}
114
115var uiReplacer = strings.NewReplacer(
116 "%21", "!",
117 "%27", "'",
118 "%28", "(",
119 "%29", ")",
120 "%2A", "*",
121)
122
123// unescapeUserinfo unescapes some characters that need not to be escaped as per RFC3986.
124func unescapeUserinfo(s string) string {
125 return uiReplacer.Replace(s)
126}
127
128// Escape reassembles the URL into a valid URL string.
129// The general form of the result is one of:
130//
131// scheme:opaque
132// scheme://userinfo@host/path?query#fragment
133//
134// If u.Opaque is non-empty, String uses the first form;
135// otherwise it uses the second form.
136//
137// In the second form, the following rules apply:
138// - if u.Scheme is empty, scheme: is omitted.
139// - if u.User is nil, userinfo@ is omitted.
140// - if u.Host is empty, host/ is omitted.
141// - if u.Scheme and u.Host are empty and u.User is nil,
142// the entire scheme://userinfo@host/ is omitted.
143// - if u.Host is non-empty and u.Path begins with a /,
144// the form host/path does not add its own /.
145// - if u.RawQuery is empty, ?query is omitted.
146// - if u.Fragment is empty, #fragment is omitted.
147func Escape(u *url.URL) string {
148 var buf bytes.Buffer
149 if u.Scheme != "" {
150 buf.WriteString(u.Scheme)
151 buf.WriteByte(':')
152 }
153 if u.Opaque != "" {
154 buf.WriteString(u.Opaque)
155 } else {
156 if u.Scheme != "" || u.Host != "" || u.User != nil {
157 buf.WriteString("//")
158 if ui := u.User; ui != nil {
159 buf.WriteString(unescapeUserinfo(ui.String()))
160 buf.WriteByte('@')
161 }
162 if h := u.Host; h != "" {
163 buf.WriteString(h)
164 }
165 }
166 if u.Path != "" && u.Path[0] != '/' && u.Host != "" {
167 buf.WriteByte('/')
168 }
169 buf.WriteString(escape(u.Path, encodePath))
170 }
171 if u.RawQuery != "" {
172 buf.WriteByte('?')
173 buf.WriteString(u.RawQuery)
174 }
175 if u.Fragment != "" {
176 buf.WriteByte('#')
177 buf.WriteString(escape(u.Fragment, encodeFragment))
178 }
179 return buf.String()
180}