aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/net/idna/idna9.0.0.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/net/idna/idna9.0.0.go')
-rw-r--r--vendor/golang.org/x/net/idna/idna9.0.0.go717
1 files changed, 717 insertions, 0 deletions
diff --git a/vendor/golang.org/x/net/idna/idna9.0.0.go b/vendor/golang.org/x/net/idna/idna9.0.0.go
new file mode 100644
index 0000000..cc6a892
--- /dev/null
+++ b/vendor/golang.org/x/net/idna/idna9.0.0.go
@@ -0,0 +1,717 @@
1// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
2
3// Copyright 2016 The Go Authors. All rights reserved.
4// Use of this source code is governed by a BSD-style
5// license that can be found in the LICENSE file.
6
7//go:build !go1.10
8
9// Package idna implements IDNA2008 using the compatibility processing
10// defined by UTS (Unicode Technical Standard) #46, which defines a standard to
11// deal with the transition from IDNA2003.
12//
13// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
14// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
15// UTS #46 is defined in https://www.unicode.org/reports/tr46.
16// See https://unicode.org/cldr/utility/idna.jsp for a visualization of the
17// differences between these two standards.
18package idna // import "golang.org/x/net/idna"
19
20import (
21 "fmt"
22 "strings"
23 "unicode/utf8"
24
25 "golang.org/x/text/secure/bidirule"
26 "golang.org/x/text/unicode/norm"
27)
28
29// NOTE: Unlike common practice in Go APIs, the functions will return a
30// sanitized domain name in case of errors. Browsers sometimes use a partially
31// evaluated string as lookup.
32// TODO: the current error handling is, in my opinion, the least opinionated.
33// Other strategies are also viable, though:
34// Option 1) Return an empty string in case of error, but allow the user to
35// specify explicitly which errors to ignore.
36// Option 2) Return the partially evaluated string if it is itself a valid
37// string, otherwise return the empty string in case of error.
38// Option 3) Option 1 and 2.
39// Option 4) Always return an empty string for now and implement Option 1 as
40// needed, and document that the return string may not be empty in case of
41// error in the future.
42// I think Option 1 is best, but it is quite opinionated.
43
44// ToASCII is a wrapper for Punycode.ToASCII.
45func ToASCII(s string) (string, error) {
46 return Punycode.process(s, true)
47}
48
49// ToUnicode is a wrapper for Punycode.ToUnicode.
50func ToUnicode(s string) (string, error) {
51 return Punycode.process(s, false)
52}
53
54// An Option configures a Profile at creation time.
55type Option func(*options)
56
57// Transitional sets a Profile to use the Transitional mapping as defined in UTS
58// #46. This will cause, for example, "ß" to be mapped to "ss". Using the
59// transitional mapping provides a compromise between IDNA2003 and IDNA2008
60// compatibility. It is used by some browsers when resolving domain names. This
61// option is only meaningful if combined with MapForLookup.
62func Transitional(transitional bool) Option {
63 return func(o *options) { o.transitional = transitional }
64}
65
66// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
67// are longer than allowed by the RFC.
68//
69// This option corresponds to the VerifyDnsLength flag in UTS #46.
70func VerifyDNSLength(verify bool) Option {
71 return func(o *options) { o.verifyDNSLength = verify }
72}
73
74// RemoveLeadingDots removes leading label separators. Leading runes that map to
75// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
76func RemoveLeadingDots(remove bool) Option {
77 return func(o *options) { o.removeLeadingDots = remove }
78}
79
80// ValidateLabels sets whether to check the mandatory label validation criteria
81// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
82// of hyphens ('-'), normalization, validity of runes, and the context rules.
83// In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
84// in UTS #46.
85func ValidateLabels(enable bool) Option {
86 return func(o *options) {
87 // Don't override existing mappings, but set one that at least checks
88 // normalization if it is not set.
89 if o.mapping == nil && enable {
90 o.mapping = normalize
91 }
92 o.trie = trie
93 o.checkJoiners = enable
94 o.checkHyphens = enable
95 if enable {
96 o.fromPuny = validateFromPunycode
97 } else {
98 o.fromPuny = nil
99 }
100 }
101}
102
103// CheckHyphens sets whether to check for correct use of hyphens ('-') in
104// labels. Most web browsers do not have this option set, since labels such as
105// "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
106//
107// This option corresponds to the CheckHyphens flag in UTS #46.
108func CheckHyphens(enable bool) Option {
109 return func(o *options) { o.checkHyphens = enable }
110}
111
112// CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
113// A of RFC 5892, concerning the use of joiner runes.
114//
115// This option corresponds to the CheckJoiners flag in UTS #46.
116func CheckJoiners(enable bool) Option {
117 return func(o *options) {
118 o.trie = trie
119 o.checkJoiners = enable
120 }
121}
122
123// StrictDomainName limits the set of permissible ASCII characters to those
124// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
125// hyphen). This is set by default for MapForLookup and ValidateForRegistration,
126// but is only useful if ValidateLabels is set.
127//
128// This option is useful, for instance, for browsers that allow characters
129// outside this range, for example a '_' (U+005F LOW LINE). See
130// http://www.rfc-editor.org/std/std3.txt for more details.
131//
132// This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
133func StrictDomainName(use bool) Option {
134 return func(o *options) { o.useSTD3Rules = use }
135}
136
137// NOTE: the following options pull in tables. The tables should not be linked
138// in as long as the options are not used.
139
140// BidiRule enables the Bidi rule as defined in RFC 5893. Any application
141// that relies on proper validation of labels should include this rule.
142//
143// This option corresponds to the CheckBidi flag in UTS #46.
144func BidiRule() Option {
145 return func(o *options) { o.bidirule = bidirule.ValidString }
146}
147
148// ValidateForRegistration sets validation options to verify that a given IDN is
149// properly formatted for registration as defined by Section 4 of RFC 5891.
150func ValidateForRegistration() Option {
151 return func(o *options) {
152 o.mapping = validateRegistration
153 StrictDomainName(true)(o)
154 ValidateLabels(true)(o)
155 VerifyDNSLength(true)(o)
156 BidiRule()(o)
157 }
158}
159
160// MapForLookup sets validation and mapping options such that a given IDN is
161// transformed for domain name lookup according to the requirements set out in
162// Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
163// RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
164// to add this check.
165//
166// The mappings include normalization and mapping case, width and other
167// compatibility mappings.
168func MapForLookup() Option {
169 return func(o *options) {
170 o.mapping = validateAndMap
171 StrictDomainName(true)(o)
172 ValidateLabels(true)(o)
173 RemoveLeadingDots(true)(o)
174 }
175}
176
177type options struct {
178 transitional bool
179 useSTD3Rules bool
180 checkHyphens bool
181 checkJoiners bool
182 verifyDNSLength bool
183 removeLeadingDots bool
184
185 trie *idnaTrie
186
187 // fromPuny calls validation rules when converting A-labels to U-labels.
188 fromPuny func(p *Profile, s string) error
189
190 // mapping implements a validation and mapping step as defined in RFC 5895
191 // or UTS 46, tailored to, for example, domain registration or lookup.
192 mapping func(p *Profile, s string) (string, error)
193
194 // bidirule, if specified, checks whether s conforms to the Bidi Rule
195 // defined in RFC 5893.
196 bidirule func(s string) bool
197}
198
199// A Profile defines the configuration of a IDNA mapper.
200type Profile struct {
201 options
202}
203
204func apply(o *options, opts []Option) {
205 for _, f := range opts {
206 f(o)
207 }
208}
209
210// New creates a new Profile.
211//
212// With no options, the returned Profile is the most permissive and equals the
213// Punycode Profile. Options can be passed to further restrict the Profile. The
214// MapForLookup and ValidateForRegistration options set a collection of options,
215// for lookup and registration purposes respectively, which can be tailored by
216// adding more fine-grained options, where later options override earlier
217// options.
218func New(o ...Option) *Profile {
219 p := &Profile{}
220 apply(&p.options, o)
221 return p
222}
223
224// ToASCII converts a domain or domain label to its ASCII form. For example,
225// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
226// ToASCII("golang") is "golang". If an error is encountered it will return
227// an error and a (partially) processed result.
228func (p *Profile) ToASCII(s string) (string, error) {
229 return p.process(s, true)
230}
231
232// ToUnicode converts a domain or domain label to its Unicode form. For example,
233// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
234// ToUnicode("golang") is "golang". If an error is encountered it will return
235// an error and a (partially) processed result.
236func (p *Profile) ToUnicode(s string) (string, error) {
237 pp := *p
238 pp.transitional = false
239 return pp.process(s, false)
240}
241
242// String reports a string with a description of the profile for debugging
243// purposes. The string format may change with different versions.
244func (p *Profile) String() string {
245 s := ""
246 if p.transitional {
247 s = "Transitional"
248 } else {
249 s = "NonTransitional"
250 }
251 if p.useSTD3Rules {
252 s += ":UseSTD3Rules"
253 }
254 if p.checkHyphens {
255 s += ":CheckHyphens"
256 }
257 if p.checkJoiners {
258 s += ":CheckJoiners"
259 }
260 if p.verifyDNSLength {
261 s += ":VerifyDNSLength"
262 }
263 return s
264}
265
266var (
267 // Punycode is a Profile that does raw punycode processing with a minimum
268 // of validation.
269 Punycode *Profile = punycode
270
271 // Lookup is the recommended profile for looking up domain names, according
272 // to Section 5 of RFC 5891. The exact configuration of this profile may
273 // change over time.
274 Lookup *Profile = lookup
275
276 // Display is the recommended profile for displaying domain names.
277 // The configuration of this profile may change over time.
278 Display *Profile = display
279
280 // Registration is the recommended profile for checking whether a given
281 // IDN is valid for registration, according to Section 4 of RFC 5891.
282 Registration *Profile = registration
283
284 punycode = &Profile{}
285 lookup = &Profile{options{
286 transitional: true,
287 removeLeadingDots: true,
288 useSTD3Rules: true,
289 checkHyphens: true,
290 checkJoiners: true,
291 trie: trie,
292 fromPuny: validateFromPunycode,
293 mapping: validateAndMap,
294 bidirule: bidirule.ValidString,
295 }}
296 display = &Profile{options{
297 useSTD3Rules: true,
298 removeLeadingDots: true,
299 checkHyphens: true,
300 checkJoiners: true,
301 trie: trie,
302 fromPuny: validateFromPunycode,
303 mapping: validateAndMap,
304 bidirule: bidirule.ValidString,
305 }}
306 registration = &Profile{options{
307 useSTD3Rules: true,
308 verifyDNSLength: true,
309 checkHyphens: true,
310 checkJoiners: true,
311 trie: trie,
312 fromPuny: validateFromPunycode,
313 mapping: validateRegistration,
314 bidirule: bidirule.ValidString,
315 }}
316
317 // TODO: profiles
318 // Register: recommended for approving domain names: don't do any mappings
319 // but rather reject on invalid input. Bundle or block deviation characters.
320)
321
322type labelError struct{ label, code_ string }
323
324func (e labelError) code() string { return e.code_ }
325func (e labelError) Error() string {
326 return fmt.Sprintf("idna: invalid label %q", e.label)
327}
328
329type runeError rune
330
331func (e runeError) code() string { return "P1" }
332func (e runeError) Error() string {
333 return fmt.Sprintf("idna: disallowed rune %U", e)
334}
335
336// process implements the algorithm described in section 4 of UTS #46,
337// see https://www.unicode.org/reports/tr46.
338func (p *Profile) process(s string, toASCII bool) (string, error) {
339 var err error
340 if p.mapping != nil {
341 s, err = p.mapping(p, s)
342 }
343 // Remove leading empty labels.
344 if p.removeLeadingDots {
345 for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
346 }
347 }
348 // It seems like we should only create this error on ToASCII, but the
349 // UTS 46 conformance tests suggests we should always check this.
350 if err == nil && p.verifyDNSLength && s == "" {
351 err = &labelError{s, "A4"}
352 }
353 labels := labelIter{orig: s}
354 for ; !labels.done(); labels.next() {
355 label := labels.label()
356 if label == "" {
357 // Empty labels are not okay. The label iterator skips the last
358 // label if it is empty.
359 if err == nil && p.verifyDNSLength {
360 err = &labelError{s, "A4"}
361 }
362 continue
363 }
364 if strings.HasPrefix(label, acePrefix) {
365 u, err2 := decode(label[len(acePrefix):])
366 if err2 != nil {
367 if err == nil {
368 err = err2
369 }
370 // Spec says keep the old label.
371 continue
372 }
373 labels.set(u)
374 if err == nil && p.fromPuny != nil {
375 err = p.fromPuny(p, u)
376 }
377 if err == nil {
378 // This should be called on NonTransitional, according to the
379 // spec, but that currently does not have any effect. Use the
380 // original profile to preserve options.
381 err = p.validateLabel(u)
382 }
383 } else if err == nil {
384 err = p.validateLabel(label)
385 }
386 }
387 if toASCII {
388 for labels.reset(); !labels.done(); labels.next() {
389 label := labels.label()
390 if !ascii(label) {
391 a, err2 := encode(acePrefix, label)
392 if err == nil {
393 err = err2
394 }
395 label = a
396 labels.set(a)
397 }
398 n := len(label)
399 if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
400 err = &labelError{label, "A4"}
401 }
402 }
403 }
404 s = labels.result()
405 if toASCII && p.verifyDNSLength && err == nil {
406 // Compute the length of the domain name minus the root label and its dot.
407 n := len(s)
408 if n > 0 && s[n-1] == '.' {
409 n--
410 }
411 if len(s) < 1 || n > 253 {
412 err = &labelError{s, "A4"}
413 }
414 }
415 return s, err
416}
417
418func normalize(p *Profile, s string) (string, error) {
419 return norm.NFC.String(s), nil
420}
421
422func validateRegistration(p *Profile, s string) (string, error) {
423 if !norm.NFC.IsNormalString(s) {
424 return s, &labelError{s, "V1"}
425 }
426 for i := 0; i < len(s); {
427 v, sz := trie.lookupString(s[i:])
428 // Copy bytes not copied so far.
429 switch p.simplify(info(v).category()) {
430 // TODO: handle the NV8 defined in the Unicode idna data set to allow
431 // for strict conformance to IDNA2008.
432 case valid, deviation:
433 case disallowed, mapped, unknown, ignored:
434 r, _ := utf8.DecodeRuneInString(s[i:])
435 return s, runeError(r)
436 }
437 i += sz
438 }
439 return s, nil
440}
441
442func validateAndMap(p *Profile, s string) (string, error) {
443 var (
444 err error
445 b []byte
446 k int
447 )
448 for i := 0; i < len(s); {
449 v, sz := trie.lookupString(s[i:])
450 start := i
451 i += sz
452 // Copy bytes not copied so far.
453 switch p.simplify(info(v).category()) {
454 case valid:
455 continue
456 case disallowed:
457 if err == nil {
458 r, _ := utf8.DecodeRuneInString(s[start:])
459 err = runeError(r)
460 }
461 continue
462 case mapped, deviation:
463 b = append(b, s[k:start]...)
464 b = info(v).appendMapping(b, s[start:i])
465 case ignored:
466 b = append(b, s[k:start]...)
467 // drop the rune
468 case unknown:
469 b = append(b, s[k:start]...)
470 b = append(b, "\ufffd"...)
471 }
472 k = i
473 }
474 if k == 0 {
475 // No changes so far.
476 s = norm.NFC.String(s)
477 } else {
478 b = append(b, s[k:]...)
479 if norm.NFC.QuickSpan(b) != len(b) {
480 b = norm.NFC.Bytes(b)
481 }
482 // TODO: the punycode converters require strings as input.
483 s = string(b)
484 }
485 return s, err
486}
487
488// A labelIter allows iterating over domain name labels.
489type labelIter struct {
490 orig string
491 slice []string
492 curStart int
493 curEnd int
494 i int
495}
496
497func (l *labelIter) reset() {
498 l.curStart = 0
499 l.curEnd = 0
500 l.i = 0
501}
502
503func (l *labelIter) done() bool {
504 return l.curStart >= len(l.orig)
505}
506
507func (l *labelIter) result() string {
508 if l.slice != nil {
509 return strings.Join(l.slice, ".")
510 }
511 return l.orig
512}
513
514func (l *labelIter) label() string {
515 if l.slice != nil {
516 return l.slice[l.i]
517 }
518 p := strings.IndexByte(l.orig[l.curStart:], '.')
519 l.curEnd = l.curStart + p
520 if p == -1 {
521 l.curEnd = len(l.orig)
522 }
523 return l.orig[l.curStart:l.curEnd]
524}
525
526// next sets the value to the next label. It skips the last label if it is empty.
527func (l *labelIter) next() {
528 l.i++
529 if l.slice != nil {
530 if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
531 l.curStart = len(l.orig)
532 }
533 } else {
534 l.curStart = l.curEnd + 1
535 if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
536 l.curStart = len(l.orig)
537 }
538 }
539}
540
541func (l *labelIter) set(s string) {
542 if l.slice == nil {
543 l.slice = strings.Split(l.orig, ".")
544 }
545 l.slice[l.i] = s
546}
547
548// acePrefix is the ASCII Compatible Encoding prefix.
549const acePrefix = "xn--"
550
551func (p *Profile) simplify(cat category) category {
552 switch cat {
553 case disallowedSTD3Mapped:
554 if p.useSTD3Rules {
555 cat = disallowed
556 } else {
557 cat = mapped
558 }
559 case disallowedSTD3Valid:
560 if p.useSTD3Rules {
561 cat = disallowed
562 } else {
563 cat = valid
564 }
565 case deviation:
566 if !p.transitional {
567 cat = valid
568 }
569 case validNV8, validXV8:
570 // TODO: handle V2008
571 cat = valid
572 }
573 return cat
574}
575
576func validateFromPunycode(p *Profile, s string) error {
577 if !norm.NFC.IsNormalString(s) {
578 return &labelError{s, "V1"}
579 }
580 for i := 0; i < len(s); {
581 v, sz := trie.lookupString(s[i:])
582 if c := p.simplify(info(v).category()); c != valid && c != deviation {
583 return &labelError{s, "V6"}
584 }
585 i += sz
586 }
587 return nil
588}
589
590const (
591 zwnj = "\u200c"
592 zwj = "\u200d"
593)
594
595type joinState int8
596
597const (
598 stateStart joinState = iota
599 stateVirama
600 stateBefore
601 stateBeforeVirama
602 stateAfter
603 stateFAIL
604)
605
606var joinStates = [][numJoinTypes]joinState{
607 stateStart: {
608 joiningL: stateBefore,
609 joiningD: stateBefore,
610 joinZWNJ: stateFAIL,
611 joinZWJ: stateFAIL,
612 joinVirama: stateVirama,
613 },
614 stateVirama: {
615 joiningL: stateBefore,
616 joiningD: stateBefore,
617 },
618 stateBefore: {
619 joiningL: stateBefore,
620 joiningD: stateBefore,
621 joiningT: stateBefore,
622 joinZWNJ: stateAfter,
623 joinZWJ: stateFAIL,
624 joinVirama: stateBeforeVirama,
625 },
626 stateBeforeVirama: {
627 joiningL: stateBefore,
628 joiningD: stateBefore,
629 joiningT: stateBefore,
630 },
631 stateAfter: {
632 joiningL: stateFAIL,
633 joiningD: stateBefore,
634 joiningT: stateAfter,
635 joiningR: stateStart,
636 joinZWNJ: stateFAIL,
637 joinZWJ: stateFAIL,
638 joinVirama: stateAfter, // no-op as we can't accept joiners here
639 },
640 stateFAIL: {
641 0: stateFAIL,
642 joiningL: stateFAIL,
643 joiningD: stateFAIL,
644 joiningT: stateFAIL,
645 joiningR: stateFAIL,
646 joinZWNJ: stateFAIL,
647 joinZWJ: stateFAIL,
648 joinVirama: stateFAIL,
649 },
650}
651
652// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
653// already implicitly satisfied by the overall implementation.
654func (p *Profile) validateLabel(s string) error {
655 if s == "" {
656 if p.verifyDNSLength {
657 return &labelError{s, "A4"}
658 }
659 return nil
660 }
661 if p.bidirule != nil && !p.bidirule(s) {
662 return &labelError{s, "B"}
663 }
664 if p.checkHyphens {
665 if len(s) > 4 && s[2] == '-' && s[3] == '-' {
666 return &labelError{s, "V2"}
667 }
668 if s[0] == '-' || s[len(s)-1] == '-' {
669 return &labelError{s, "V3"}
670 }
671 }
672 if !p.checkJoiners {
673 return nil
674 }
675 trie := p.trie // p.checkJoiners is only set if trie is set.
676 // TODO: merge the use of this in the trie.
677 v, sz := trie.lookupString(s)
678 x := info(v)
679 if x.isModifier() {
680 return &labelError{s, "V5"}
681 }
682 // Quickly return in the absence of zero-width (non) joiners.
683 if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
684 return nil
685 }
686 st := stateStart
687 for i := 0; ; {
688 jt := x.joinType()
689 if s[i:i+sz] == zwj {
690 jt = joinZWJ
691 } else if s[i:i+sz] == zwnj {
692 jt = joinZWNJ
693 }
694 st = joinStates[st][jt]
695 if x.isViramaModifier() {
696 st = joinStates[st][joinVirama]
697 }
698 if i += sz; i == len(s) {
699 break
700 }
701 v, sz = trie.lookupString(s[i:])
702 x = info(v)
703 }
704 if st == stateFAIL || st == stateAfter {
705 return &labelError{s, "C"}
706 }
707 return nil
708}
709
710func ascii(s string) bool {
711 for i := 0; i < len(s); i++ {
712 if s[i] >= utf8.RuneSelf {
713 return false
714 }
715 }
716 return true
717}