aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/net/publicsuffix/list.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/net/publicsuffix/list.go')
-rw-r--r--vendor/golang.org/x/net/publicsuffix/list.go203
1 files changed, 203 insertions, 0 deletions
diff --git a/vendor/golang.org/x/net/publicsuffix/list.go b/vendor/golang.org/x/net/publicsuffix/list.go
new file mode 100644
index 0000000..d56e9e7
--- /dev/null
+++ b/vendor/golang.org/x/net/publicsuffix/list.go
@@ -0,0 +1,203 @@
1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:generate go run gen.go
6
7// Package publicsuffix provides a public suffix list based on data from
8// https://publicsuffix.org/
9//
10// A public suffix is one under which Internet users can directly register
11// names. It is related to, but different from, a TLD (top level domain).
12//
13// "com" is a TLD (top level domain). Top level means it has no dots.
14//
15// "com" is also a public suffix. Amazon and Google have registered different
16// siblings under that domain: "amazon.com" and "google.com".
17//
18// "au" is another TLD, again because it has no dots. But it's not "amazon.au".
19// Instead, it's "amazon.com.au".
20//
21// "com.au" isn't an actual TLD, because it's not at the top level (it has
22// dots). But it is an eTLD (effective TLD), because that's the branching point
23// for domain name registrars.
24//
25// Another name for "an eTLD" is "a public suffix". Often, what's more of
26// interest is the eTLD+1, or one more label than the public suffix. For
27// example, browsers partition read/write access to HTTP cookies according to
28// the eTLD+1. Web pages served from "amazon.com.au" can't read cookies from
29// "google.com.au", but web pages served from "maps.google.com" can share
30// cookies from "www.google.com", so you don't have to sign into Google Maps
31// separately from signing into Google Web Search. Note that all four of those
32// domains have 3 labels and 2 dots. The first two domains are each an eTLD+1,
33// the last two are not (but share the same eTLD+1: "google.com").
34//
35// All of these domains have the same eTLD+1:
36// - "www.books.amazon.co.uk"
37// - "books.amazon.co.uk"
38// - "amazon.co.uk"
39//
40// Specifically, the eTLD+1 is "amazon.co.uk", because the eTLD is "co.uk".
41//
42// There is no closed form algorithm to calculate the eTLD of a domain.
43// Instead, the calculation is data driven. This package provides a
44// pre-compiled snapshot of Mozilla's PSL (Public Suffix List) data at
45// https://publicsuffix.org/
46package publicsuffix // import "golang.org/x/net/publicsuffix"
47
48// TODO: specify case sensitivity and leading/trailing dot behavior for
49// func PublicSuffix and func EffectiveTLDPlusOne.
50
51import (
52 "fmt"
53 "net/http/cookiejar"
54 "strings"
55)
56
57// List implements the cookiejar.PublicSuffixList interface by calling the
58// PublicSuffix function.
59var List cookiejar.PublicSuffixList = list{}
60
61type list struct{}
62
63func (list) PublicSuffix(domain string) string {
64 ps, _ := PublicSuffix(domain)
65 return ps
66}
67
68func (list) String() string {
69 return version
70}
71
72// PublicSuffix returns the public suffix of the domain using a copy of the
73// publicsuffix.org database compiled into the library.
74//
75// icann is whether the public suffix is managed by the Internet Corporation
76// for Assigned Names and Numbers. If not, the public suffix is either a
77// privately managed domain (and in practice, not a top level domain) or an
78// unmanaged top level domain (and not explicitly mentioned in the
79// publicsuffix.org list). For example, "foo.org" and "foo.co.uk" are ICANN
80// domains, "foo.dyndns.org" and "foo.blogspot.co.uk" are private domains and
81// "cromulent" is an unmanaged top level domain.
82//
83// Use cases for distinguishing ICANN domains like "foo.com" from private
84// domains like "foo.appspot.com" can be found at
85// https://wiki.mozilla.org/Public_Suffix_List/Use_Cases
86func PublicSuffix(domain string) (publicSuffix string, icann bool) {
87 lo, hi := uint32(0), uint32(numTLD)
88 s, suffix, icannNode, wildcard := domain, len(domain), false, false
89loop:
90 for {
91 dot := strings.LastIndex(s, ".")
92 if wildcard {
93 icann = icannNode
94 suffix = 1 + dot
95 }
96 if lo == hi {
97 break
98 }
99 f := find(s[1+dot:], lo, hi)
100 if f == notFound {
101 break
102 }
103
104 u := uint32(nodes.get(f) >> (nodesBitsTextOffset + nodesBitsTextLength))
105 icannNode = u&(1<<nodesBitsICANN-1) != 0
106 u >>= nodesBitsICANN
107 u = children.get(u & (1<<nodesBitsChildren - 1))
108 lo = u & (1<<childrenBitsLo - 1)
109 u >>= childrenBitsLo
110 hi = u & (1<<childrenBitsHi - 1)
111 u >>= childrenBitsHi
112 switch u & (1<<childrenBitsNodeType - 1) {
113 case nodeTypeNormal:
114 suffix = 1 + dot
115 case nodeTypeException:
116 suffix = 1 + len(s)
117 break loop
118 }
119 u >>= childrenBitsNodeType
120 wildcard = u&(1<<childrenBitsWildcard-1) != 0
121 if !wildcard {
122 icann = icannNode
123 }
124
125 if dot == -1 {
126 break
127 }
128 s = s[:dot]
129 }
130 if suffix == len(domain) {
131 // If no rules match, the prevailing rule is "*".
132 return domain[1+strings.LastIndex(domain, "."):], icann
133 }
134 return domain[suffix:], icann
135}
136
137const notFound uint32 = 1<<32 - 1
138
139// find returns the index of the node in the range [lo, hi) whose label equals
140// label, or notFound if there is no such node. The range is assumed to be in
141// strictly increasing node label order.
142func find(label string, lo, hi uint32) uint32 {
143 for lo < hi {
144 mid := lo + (hi-lo)/2
145 s := nodeLabel(mid)
146 if s < label {
147 lo = mid + 1
148 } else if s == label {
149 return mid
150 } else {
151 hi = mid
152 }
153 }
154 return notFound
155}
156
157// nodeLabel returns the label for the i'th node.
158func nodeLabel(i uint32) string {
159 x := nodes.get(i)
160 length := x & (1<<nodesBitsTextLength - 1)
161 x >>= nodesBitsTextLength
162 offset := x & (1<<nodesBitsTextOffset - 1)
163 return text[offset : offset+length]
164}
165
166// EffectiveTLDPlusOne returns the effective top level domain plus one more
167// label. For example, the eTLD+1 for "foo.bar.golang.org" is "golang.org".
168func EffectiveTLDPlusOne(domain string) (string, error) {
169 if strings.HasPrefix(domain, ".") || strings.HasSuffix(domain, ".") || strings.Contains(domain, "..") {
170 return "", fmt.Errorf("publicsuffix: empty label in domain %q", domain)
171 }
172
173 suffix, _ := PublicSuffix(domain)
174 if len(domain) <= len(suffix) {
175 return "", fmt.Errorf("publicsuffix: cannot derive eTLD+1 for domain %q", domain)
176 }
177 i := len(domain) - len(suffix) - 1
178 if domain[i] != '.' {
179 return "", fmt.Errorf("publicsuffix: invalid public suffix %q for domain %q", suffix, domain)
180 }
181 return domain[1+strings.LastIndex(domain[:i], "."):], nil
182}
183
184type uint32String string
185
186func (u uint32String) get(i uint32) uint32 {
187 off := i * 4
188 return (uint32(u[off])<<24 |
189 uint32(u[off+1])<<16 |
190 uint32(u[off+2])<<8 |
191 uint32(u[off+3]))
192}
193
194type uint40String string
195
196func (u uint40String) get(i uint32) uint64 {
197 off := uint64(i * (nodesBits / 8))
198 return uint64(u[off])<<32 |
199 uint64(u[off+1])<<24 |
200 uint64(u[off+2])<<16 |
201 uint64(u[off+3])<<8 |
202 uint64(u[off+4])
203}