diff options
Diffstat (limited to 'vendor/golang.org/x/text/secure/bidirule/bidirule.go')
-rw-r--r-- | vendor/golang.org/x/text/secure/bidirule/bidirule.go | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/secure/bidirule/bidirule.go b/vendor/golang.org/x/text/secure/bidirule/bidirule.go new file mode 100644 index 0000000..e2b70f7 --- /dev/null +++ b/vendor/golang.org/x/text/secure/bidirule/bidirule.go | |||
@@ -0,0 +1,336 @@ | |||
1 | // Copyright 2016 The Go Authors. All rights reserved. | ||
2 | // Use of this source code is governed by a BSD-style | ||
3 | // license that can be found in the LICENSE file. | ||
4 | |||
5 | // Package bidirule implements the Bidi Rule defined by RFC 5893. | ||
6 | // | ||
7 | // This package is under development. The API may change without notice and | ||
8 | // without preserving backward compatibility. | ||
9 | package bidirule | ||
10 | |||
11 | import ( | ||
12 | "errors" | ||
13 | "unicode/utf8" | ||
14 | |||
15 | "golang.org/x/text/transform" | ||
16 | "golang.org/x/text/unicode/bidi" | ||
17 | ) | ||
18 | |||
19 | // This file contains an implementation of RFC 5893: Right-to-Left Scripts for | ||
20 | // Internationalized Domain Names for Applications (IDNA) | ||
21 | // | ||
22 | // A label is an individual component of a domain name. Labels are usually | ||
23 | // shown separated by dots; for example, the domain name "www.example.com" is | ||
24 | // composed of three labels: "www", "example", and "com". | ||
25 | // | ||
26 | // An RTL label is a label that contains at least one character of class R, AL, | ||
27 | // or AN. An LTR label is any label that is not an RTL label. | ||
28 | // | ||
29 | // A "Bidi domain name" is a domain name that contains at least one RTL label. | ||
30 | // | ||
31 | // The following guarantees can be made based on the above: | ||
32 | // | ||
33 | // o In a domain name consisting of only labels that satisfy the rule, | ||
34 | // the requirements of Section 3 are satisfied. Note that even LTR | ||
35 | // labels and pure ASCII labels have to be tested. | ||
36 | // | ||
37 | // o In a domain name consisting of only LDH labels (as defined in the | ||
38 | // Definitions document [RFC5890]) and labels that satisfy the rule, | ||
39 | // the requirements of Section 3 are satisfied as long as a label | ||
40 | // that starts with an ASCII digit does not come after a | ||
41 | // right-to-left label. | ||
42 | // | ||
43 | // No guarantee is given for other combinations. | ||
44 | |||
45 | // ErrInvalid indicates a label is invalid according to the Bidi Rule. | ||
46 | var ErrInvalid = errors.New("bidirule: failed Bidi Rule") | ||
47 | |||
48 | type ruleState uint8 | ||
49 | |||
50 | const ( | ||
51 | ruleInitial ruleState = iota | ||
52 | ruleLTR | ||
53 | ruleLTRFinal | ||
54 | ruleRTL | ||
55 | ruleRTLFinal | ||
56 | ruleInvalid | ||
57 | ) | ||
58 | |||
59 | type ruleTransition struct { | ||
60 | next ruleState | ||
61 | mask uint16 | ||
62 | } | ||
63 | |||
64 | var transitions = [...][2]ruleTransition{ | ||
65 | // [2.1] The first character must be a character with Bidi property L, R, or | ||
66 | // AL. If it has the R or AL property, it is an RTL label; if it has the L | ||
67 | // property, it is an LTR label. | ||
68 | ruleInitial: { | ||
69 | {ruleLTRFinal, 1 << bidi.L}, | ||
70 | {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL}, | ||
71 | }, | ||
72 | ruleRTL: { | ||
73 | // [2.3] In an RTL label, the end of the label must be a character with | ||
74 | // Bidi property R, AL, EN, or AN, followed by zero or more characters | ||
75 | // with Bidi property NSM. | ||
76 | {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN}, | ||
77 | |||
78 | // [2.2] In an RTL label, only characters with the Bidi properties R, | ||
79 | // AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. | ||
80 | // We exclude the entries from [2.3] | ||
81 | {ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM}, | ||
82 | }, | ||
83 | ruleRTLFinal: { | ||
84 | // [2.3] In an RTL label, the end of the label must be a character with | ||
85 | // Bidi property R, AL, EN, or AN, followed by zero or more characters | ||
86 | // with Bidi property NSM. | ||
87 | {ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN | 1<<bidi.NSM}, | ||
88 | |||
89 | // [2.2] In an RTL label, only characters with the Bidi properties R, | ||
90 | // AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed. | ||
91 | // We exclude the entries from [2.3] and NSM. | ||
92 | {ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN}, | ||
93 | }, | ||
94 | ruleLTR: { | ||
95 | // [2.6] In an LTR label, the end of the label must be a character with | ||
96 | // Bidi property L or EN, followed by zero or more characters with Bidi | ||
97 | // property NSM. | ||
98 | {ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN}, | ||
99 | |||
100 | // [2.5] In an LTR label, only characters with the Bidi properties L, | ||
101 | // EN, ES, CS, ET, ON, BN, or NSM are allowed. | ||
102 | // We exclude the entries from [2.6]. | ||
103 | {ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM}, | ||
104 | }, | ||
105 | ruleLTRFinal: { | ||
106 | // [2.6] In an LTR label, the end of the label must be a character with | ||
107 | // Bidi property L or EN, followed by zero or more characters with Bidi | ||
108 | // property NSM. | ||
109 | {ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN | 1<<bidi.NSM}, | ||
110 | |||
111 | // [2.5] In an LTR label, only characters with the Bidi properties L, | ||
112 | // EN, ES, CS, ET, ON, BN, or NSM are allowed. | ||
113 | // We exclude the entries from [2.6]. | ||
114 | {ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN}, | ||
115 | }, | ||
116 | ruleInvalid: { | ||
117 | {ruleInvalid, 0}, | ||
118 | {ruleInvalid, 0}, | ||
119 | }, | ||
120 | } | ||
121 | |||
122 | // [2.4] In an RTL label, if an EN is present, no AN may be present, and | ||
123 | // vice versa. | ||
124 | const exclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN) | ||
125 | |||
126 | // From RFC 5893 | ||
127 | // An RTL label is a label that contains at least one character of type | ||
128 | // R, AL, or AN. | ||
129 | // | ||
130 | // An LTR label is any label that is not an RTL label. | ||
131 | |||
132 | // Direction reports the direction of the given label as defined by RFC 5893. | ||
133 | // The Bidi Rule does not have to be applied to labels of the category | ||
134 | // LeftToRight. | ||
135 | func Direction(b []byte) bidi.Direction { | ||
136 | for i := 0; i < len(b); { | ||
137 | e, sz := bidi.Lookup(b[i:]) | ||
138 | if sz == 0 { | ||
139 | i++ | ||
140 | } | ||
141 | c := e.Class() | ||
142 | if c == bidi.R || c == bidi.AL || c == bidi.AN { | ||
143 | return bidi.RightToLeft | ||
144 | } | ||
145 | i += sz | ||
146 | } | ||
147 | return bidi.LeftToRight | ||
148 | } | ||
149 | |||
150 | // DirectionString reports the direction of the given label as defined by RFC | ||
151 | // 5893. The Bidi Rule does not have to be applied to labels of the category | ||
152 | // LeftToRight. | ||
153 | func DirectionString(s string) bidi.Direction { | ||
154 | for i := 0; i < len(s); { | ||
155 | e, sz := bidi.LookupString(s[i:]) | ||
156 | if sz == 0 { | ||
157 | i++ | ||
158 | continue | ||
159 | } | ||
160 | c := e.Class() | ||
161 | if c == bidi.R || c == bidi.AL || c == bidi.AN { | ||
162 | return bidi.RightToLeft | ||
163 | } | ||
164 | i += sz | ||
165 | } | ||
166 | return bidi.LeftToRight | ||
167 | } | ||
168 | |||
169 | // Valid reports whether b conforms to the BiDi rule. | ||
170 | func Valid(b []byte) bool { | ||
171 | var t Transformer | ||
172 | if n, ok := t.advance(b); !ok || n < len(b) { | ||
173 | return false | ||
174 | } | ||
175 | return t.isFinal() | ||
176 | } | ||
177 | |||
178 | // ValidString reports whether s conforms to the BiDi rule. | ||
179 | func ValidString(s string) bool { | ||
180 | var t Transformer | ||
181 | if n, ok := t.advanceString(s); !ok || n < len(s) { | ||
182 | return false | ||
183 | } | ||
184 | return t.isFinal() | ||
185 | } | ||
186 | |||
187 | // New returns a Transformer that verifies that input adheres to the Bidi Rule. | ||
188 | func New() *Transformer { | ||
189 | return &Transformer{} | ||
190 | } | ||
191 | |||
192 | // Transformer implements transform.Transform. | ||
193 | type Transformer struct { | ||
194 | state ruleState | ||
195 | hasRTL bool | ||
196 | seen uint16 | ||
197 | } | ||
198 | |||
199 | // A rule can only be violated for "Bidi Domain names", meaning if one of the | ||
200 | // following categories has been observed. | ||
201 | func (t *Transformer) isRTL() bool { | ||
202 | const isRTL = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.AN | ||
203 | return t.seen&isRTL != 0 | ||
204 | } | ||
205 | |||
206 | // Reset implements transform.Transformer. | ||
207 | func (t *Transformer) Reset() { *t = Transformer{} } | ||
208 | |||
209 | // Transform implements transform.Transformer. This Transformer has state and | ||
210 | // needs to be reset between uses. | ||
211 | func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { | ||
212 | if len(dst) < len(src) { | ||
213 | src = src[:len(dst)] | ||
214 | atEOF = false | ||
215 | err = transform.ErrShortDst | ||
216 | } | ||
217 | n, err1 := t.Span(src, atEOF) | ||
218 | copy(dst, src[:n]) | ||
219 | if err == nil || err1 != nil && err1 != transform.ErrShortSrc { | ||
220 | err = err1 | ||
221 | } | ||
222 | return n, n, err | ||
223 | } | ||
224 | |||
225 | // Span returns the first n bytes of src that conform to the Bidi rule. | ||
226 | func (t *Transformer) Span(src []byte, atEOF bool) (n int, err error) { | ||
227 | if t.state == ruleInvalid && t.isRTL() { | ||
228 | return 0, ErrInvalid | ||
229 | } | ||
230 | n, ok := t.advance(src) | ||
231 | switch { | ||
232 | case !ok: | ||
233 | err = ErrInvalid | ||
234 | case n < len(src): | ||
235 | if !atEOF { | ||
236 | err = transform.ErrShortSrc | ||
237 | break | ||
238 | } | ||
239 | err = ErrInvalid | ||
240 | case !t.isFinal(): | ||
241 | err = ErrInvalid | ||
242 | } | ||
243 | return n, err | ||
244 | } | ||
245 | |||
246 | // Precomputing the ASCII values decreases running time for the ASCII fast path | ||
247 | // by about 30%. | ||
248 | var asciiTable [128]bidi.Properties | ||
249 | |||
250 | func init() { | ||
251 | for i := range asciiTable { | ||
252 | p, _ := bidi.LookupRune(rune(i)) | ||
253 | asciiTable[i] = p | ||
254 | } | ||
255 | } | ||
256 | |||
257 | func (t *Transformer) advance(s []byte) (n int, ok bool) { | ||
258 | var e bidi.Properties | ||
259 | var sz int | ||
260 | for n < len(s) { | ||
261 | if s[n] < utf8.RuneSelf { | ||
262 | e, sz = asciiTable[s[n]], 1 | ||
263 | } else { | ||
264 | e, sz = bidi.Lookup(s[n:]) | ||
265 | if sz <= 1 { | ||
266 | if sz == 1 { | ||
267 | // We always consider invalid UTF-8 to be invalid, even if | ||
268 | // the string has not yet been determined to be RTL. | ||
269 | // TODO: is this correct? | ||
270 | return n, false | ||
271 | } | ||
272 | return n, true // incomplete UTF-8 encoding | ||
273 | } | ||
274 | } | ||
275 | // TODO: using CompactClass would result in noticeable speedup. | ||
276 | // See unicode/bidi/prop.go:Properties.CompactClass. | ||
277 | c := uint16(1 << e.Class()) | ||
278 | t.seen |= c | ||
279 | if t.seen&exclusiveRTL == exclusiveRTL { | ||
280 | t.state = ruleInvalid | ||
281 | return n, false | ||
282 | } | ||
283 | switch tr := transitions[t.state]; { | ||
284 | case tr[0].mask&c != 0: | ||
285 | t.state = tr[0].next | ||
286 | case tr[1].mask&c != 0: | ||
287 | t.state = tr[1].next | ||
288 | default: | ||
289 | t.state = ruleInvalid | ||
290 | if t.isRTL() { | ||
291 | return n, false | ||
292 | } | ||
293 | } | ||
294 | n += sz | ||
295 | } | ||
296 | return n, true | ||
297 | } | ||
298 | |||
299 | func (t *Transformer) advanceString(s string) (n int, ok bool) { | ||
300 | var e bidi.Properties | ||
301 | var sz int | ||
302 | for n < len(s) { | ||
303 | if s[n] < utf8.RuneSelf { | ||
304 | e, sz = asciiTable[s[n]], 1 | ||
305 | } else { | ||
306 | e, sz = bidi.LookupString(s[n:]) | ||
307 | if sz <= 1 { | ||
308 | if sz == 1 { | ||
309 | return n, false // invalid UTF-8 | ||
310 | } | ||
311 | return n, true // incomplete UTF-8 encoding | ||
312 | } | ||
313 | } | ||
314 | // TODO: using CompactClass results in noticeable speedup. | ||
315 | // See unicode/bidi/prop.go:Properties.CompactClass. | ||
316 | c := uint16(1 << e.Class()) | ||
317 | t.seen |= c | ||
318 | if t.seen&exclusiveRTL == exclusiveRTL { | ||
319 | t.state = ruleInvalid | ||
320 | return n, false | ||
321 | } | ||
322 | switch tr := transitions[t.state]; { | ||
323 | case tr[0].mask&c != 0: | ||
324 | t.state = tr[0].next | ||
325 | case tr[1].mask&c != 0: | ||
326 | t.state = tr[1].next | ||
327 | default: | ||
328 | t.state = ruleInvalid | ||
329 | if t.isRTL() { | ||
330 | return n, false | ||
331 | } | ||
332 | } | ||
333 | n += sz | ||
334 | } | ||
335 | return n, true | ||
336 | } | ||