diff options
Diffstat (limited to 'vendor/golang.org/x/text/unicode/bidi/bracket.go')
-rw-r--r-- | vendor/golang.org/x/text/unicode/bidi/bracket.go | 335 |
1 files changed, 335 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/unicode/bidi/bracket.go b/vendor/golang.org/x/text/unicode/bidi/bracket.go new file mode 100644 index 0000000..1853939 --- /dev/null +++ b/vendor/golang.org/x/text/unicode/bidi/bracket.go | |||
@@ -0,0 +1,335 @@ | |||
1 | // Copyright 2015 The Go Authors. All rights reserved. | ||
2 | // Use of this source code is governed by a BSD-style | ||
3 | // license that can be found in the LICENSE file. | ||
4 | |||
5 | package bidi | ||
6 | |||
7 | import ( | ||
8 | "container/list" | ||
9 | "fmt" | ||
10 | "sort" | ||
11 | ) | ||
12 | |||
13 | // This file contains a port of the reference implementation of the | ||
14 | // Bidi Parentheses Algorithm: | ||
15 | // https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java | ||
16 | // | ||
17 | // The implementation in this file covers definitions BD14-BD16 and rule N0 | ||
18 | // of UAX#9. | ||
19 | // | ||
20 | // Some preprocessing is done for each rune before data is passed to this | ||
21 | // algorithm: | ||
22 | // - opening and closing brackets are identified | ||
23 | // - a bracket pair type, like '(' and ')' is assigned a unique identifier that | ||
24 | // is identical for the opening and closing bracket. It is left to do these | ||
25 | // mappings. | ||
26 | // - The BPA algorithm requires that bracket characters that are canonical | ||
27 | // equivalents of each other be able to be substituted for each other. | ||
28 | // It is the responsibility of the caller to do this canonicalization. | ||
29 | // | ||
30 | // In implementing BD16, this implementation departs slightly from the "logical" | ||
31 | // algorithm defined in UAX#9. In particular, the stack referenced there | ||
32 | // supports operations that go beyond a "basic" stack. An equivalent | ||
33 | // implementation based on a linked list is used here. | ||
34 | |||
35 | // Bidi_Paired_Bracket_Type | ||
36 | // BD14. An opening paired bracket is a character whose | ||
37 | // Bidi_Paired_Bracket_Type property value is Open. | ||
38 | // | ||
39 | // BD15. A closing paired bracket is a character whose | ||
40 | // Bidi_Paired_Bracket_Type property value is Close. | ||
41 | type bracketType byte | ||
42 | |||
43 | const ( | ||
44 | bpNone bracketType = iota | ||
45 | bpOpen | ||
46 | bpClose | ||
47 | ) | ||
48 | |||
49 | // bracketPair holds a pair of index values for opening and closing bracket | ||
50 | // location of a bracket pair. | ||
51 | type bracketPair struct { | ||
52 | opener int | ||
53 | closer int | ||
54 | } | ||
55 | |||
56 | func (b *bracketPair) String() string { | ||
57 | return fmt.Sprintf("(%v, %v)", b.opener, b.closer) | ||
58 | } | ||
59 | |||
60 | // bracketPairs is a slice of bracketPairs with a sort.Interface implementation. | ||
61 | type bracketPairs []bracketPair | ||
62 | |||
63 | func (b bracketPairs) Len() int { return len(b) } | ||
64 | func (b bracketPairs) Swap(i, j int) { b[i], b[j] = b[j], b[i] } | ||
65 | func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener } | ||
66 | |||
67 | // resolvePairedBrackets runs the paired bracket part of the UBA algorithm. | ||
68 | // | ||
69 | // For each rune, it takes the indexes into the original string, the class the | ||
70 | // bracket type (in pairTypes) and the bracket identifier (pairValues). It also | ||
71 | // takes the direction type for the start-of-sentence and the embedding level. | ||
72 | // | ||
73 | // The identifiers for bracket types are the rune of the canonicalized opening | ||
74 | // bracket for brackets (open or close) or 0 for runes that are not brackets. | ||
75 | func resolvePairedBrackets(s *isolatingRunSequence) { | ||
76 | p := bracketPairer{ | ||
77 | sos: s.sos, | ||
78 | openers: list.New(), | ||
79 | codesIsolatedRun: s.types, | ||
80 | indexes: s.indexes, | ||
81 | } | ||
82 | dirEmbed := L | ||
83 | if s.level&1 != 0 { | ||
84 | dirEmbed = R | ||
85 | } | ||
86 | p.locateBrackets(s.p.pairTypes, s.p.pairValues) | ||
87 | p.resolveBrackets(dirEmbed, s.p.initialTypes) | ||
88 | } | ||
89 | |||
90 | type bracketPairer struct { | ||
91 | sos Class // direction corresponding to start of sequence | ||
92 | |||
93 | // The following is a restatement of BD 16 using non-algorithmic language. | ||
94 | // | ||
95 | // A bracket pair is a pair of characters consisting of an opening | ||
96 | // paired bracket and a closing paired bracket such that the | ||
97 | // Bidi_Paired_Bracket property value of the former equals the latter, | ||
98 | // subject to the following constraints. | ||
99 | // - both characters of a pair occur in the same isolating run sequence | ||
100 | // - the closing character of a pair follows the opening character | ||
101 | // - any bracket character can belong at most to one pair, the earliest possible one | ||
102 | // - any bracket character not part of a pair is treated like an ordinary character | ||
103 | // - pairs may nest properly, but their spans may not overlap otherwise | ||
104 | |||
105 | // Bracket characters with canonical decompositions are supposed to be | ||
106 | // treated as if they had been normalized, to allow normalized and non- | ||
107 | // normalized text to give the same result. In this implementation that step | ||
108 | // is pushed out to the caller. The caller has to ensure that the pairValue | ||
109 | // slices contain the rune of the opening bracket after normalization for | ||
110 | // any opening or closing bracket. | ||
111 | |||
112 | openers *list.List // list of positions for opening brackets | ||
113 | |||
114 | // bracket pair positions sorted by location of opening bracket | ||
115 | pairPositions bracketPairs | ||
116 | |||
117 | codesIsolatedRun []Class // directional bidi codes for an isolated run | ||
118 | indexes []int // array of index values into the original string | ||
119 | |||
120 | } | ||
121 | |||
122 | // matchOpener reports whether characters at given positions form a matching | ||
123 | // bracket pair. | ||
124 | func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool { | ||
125 | return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]] | ||
126 | } | ||
127 | |||
128 | const maxPairingDepth = 63 | ||
129 | |||
130 | // locateBrackets locates matching bracket pairs according to BD16. | ||
131 | // | ||
132 | // This implementation uses a linked list instead of a stack, because, while | ||
133 | // elements are added at the front (like a push) they are not generally removed | ||
134 | // in atomic 'pop' operations, reducing the benefit of the stack archetype. | ||
135 | func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) { | ||
136 | // traverse the run | ||
137 | // do that explicitly (not in a for-each) so we can record position | ||
138 | for i, index := range p.indexes { | ||
139 | |||
140 | // look at the bracket type for each character | ||
141 | if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON { | ||
142 | // continue scanning | ||
143 | continue | ||
144 | } | ||
145 | switch pairTypes[index] { | ||
146 | case bpOpen: | ||
147 | // check if maximum pairing depth reached | ||
148 | if p.openers.Len() == maxPairingDepth { | ||
149 | p.openers.Init() | ||
150 | return | ||
151 | } | ||
152 | // remember opener location, most recent first | ||
153 | p.openers.PushFront(i) | ||
154 | |||
155 | case bpClose: | ||
156 | // see if there is a match | ||
157 | count := 0 | ||
158 | for elem := p.openers.Front(); elem != nil; elem = elem.Next() { | ||
159 | count++ | ||
160 | opener := elem.Value.(int) | ||
161 | if p.matchOpener(pairValues, opener, i) { | ||
162 | // if the opener matches, add nested pair to the ordered list | ||
163 | p.pairPositions = append(p.pairPositions, bracketPair{opener, i}) | ||
164 | // remove up to and including matched opener | ||
165 | for ; count > 0; count-- { | ||
166 | p.openers.Remove(p.openers.Front()) | ||
167 | } | ||
168 | break | ||
169 | } | ||
170 | } | ||
171 | sort.Sort(p.pairPositions) | ||
172 | // if we get here, the closing bracket matched no openers | ||
173 | // and gets ignored | ||
174 | } | ||
175 | } | ||
176 | } | ||
177 | |||
178 | // Bracket pairs within an isolating run sequence are processed as units so | ||
179 | // that both the opening and the closing paired bracket in a pair resolve to | ||
180 | // the same direction. | ||
181 | // | ||
182 | // N0. Process bracket pairs in an isolating run sequence sequentially in | ||
183 | // the logical order of the text positions of the opening paired brackets | ||
184 | // using the logic given below. Within this scope, bidirectional types EN | ||
185 | // and AN are treated as R. | ||
186 | // | ||
187 | // Identify the bracket pairs in the current isolating run sequence | ||
188 | // according to BD16. For each bracket-pair element in the list of pairs of | ||
189 | // text positions: | ||
190 | // | ||
191 | // a Inspect the bidirectional types of the characters enclosed within the | ||
192 | // bracket pair. | ||
193 | // | ||
194 | // b If any strong type (either L or R) matching the embedding direction is | ||
195 | // found, set the type for both brackets in the pair to match the embedding | ||
196 | // direction. | ||
197 | // | ||
198 | // o [ e ] o -> o e e e o | ||
199 | // | ||
200 | // o [ o e ] -> o e o e e | ||
201 | // | ||
202 | // o [ NI e ] -> o e NI e e | ||
203 | // | ||
204 | // c Otherwise, if a strong type (opposite the embedding direction) is | ||
205 | // found, test for adjacent strong types as follows: 1 First, check | ||
206 | // backwards before the opening paired bracket until the first strong type | ||
207 | // (L, R, or sos) is found. If that first preceding strong type is opposite | ||
208 | // the embedding direction, then set the type for both brackets in the pair | ||
209 | // to that type. 2 Otherwise, set the type for both brackets in the pair to | ||
210 | // the embedding direction. | ||
211 | // | ||
212 | // o [ o ] e -> o o o o e | ||
213 | // | ||
214 | // o [ o NI ] o -> o o o NI o o | ||
215 | // | ||
216 | // e [ o ] o -> e e o e o | ||
217 | // | ||
218 | // e [ o ] e -> e e o e e | ||
219 | // | ||
220 | // e ( o [ o ] NI ) e -> e e o o o o NI e e | ||
221 | // | ||
222 | // d Otherwise, do not set the type for the current bracket pair. Note that | ||
223 | // if the enclosed text contains no strong types the paired brackets will | ||
224 | // both resolve to the same level when resolved individually using rules N1 | ||
225 | // and N2. | ||
226 | // | ||
227 | // e ( NI ) o -> e ( NI ) o | ||
228 | |||
229 | // getStrongTypeN0 maps character's directional code to strong type as required | ||
230 | // by rule N0. | ||
231 | // | ||
232 | // TODO: have separate type for "strong" directionality. | ||
233 | func (p *bracketPairer) getStrongTypeN0(index int) Class { | ||
234 | switch p.codesIsolatedRun[index] { | ||
235 | // in the scope of N0, number types are treated as R | ||
236 | case EN, AN, AL, R: | ||
237 | return R | ||
238 | case L: | ||
239 | return L | ||
240 | default: | ||
241 | return ON | ||
242 | } | ||
243 | } | ||
244 | |||
245 | // classifyPairContent reports the strong types contained inside a Bracket Pair, | ||
246 | // assuming the given embedding direction. | ||
247 | // | ||
248 | // It returns ON if no strong type is found. If a single strong type is found, | ||
249 | // it returns this type. Otherwise it returns the embedding direction. | ||
250 | // | ||
251 | // TODO: use separate type for "strong" directionality. | ||
252 | func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class { | ||
253 | dirOpposite := ON | ||
254 | for i := loc.opener + 1; i < loc.closer; i++ { | ||
255 | dir := p.getStrongTypeN0(i) | ||
256 | if dir == ON { | ||
257 | continue | ||
258 | } | ||
259 | if dir == dirEmbed { | ||
260 | return dir // type matching embedding direction found | ||
261 | } | ||
262 | dirOpposite = dir | ||
263 | } | ||
264 | // return ON if no strong type found, or class opposite to dirEmbed | ||
265 | return dirOpposite | ||
266 | } | ||
267 | |||
268 | // classBeforePair determines which strong types are present before a Bracket | ||
269 | // Pair. Return R or L if strong type found, otherwise ON. | ||
270 | func (p *bracketPairer) classBeforePair(loc bracketPair) Class { | ||
271 | for i := loc.opener - 1; i >= 0; i-- { | ||
272 | if dir := p.getStrongTypeN0(i); dir != ON { | ||
273 | return dir | ||
274 | } | ||
275 | } | ||
276 | // no strong types found, return sos | ||
277 | return p.sos | ||
278 | } | ||
279 | |||
280 | // assignBracketType implements rule N0 for a single bracket pair. | ||
281 | func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) { | ||
282 | // rule "N0, a", inspect contents of pair | ||
283 | dirPair := p.classifyPairContent(loc, dirEmbed) | ||
284 | |||
285 | // dirPair is now L, R, or N (no strong type found) | ||
286 | |||
287 | // the following logical tests are performed out of order compared to | ||
288 | // the statement of the rules but yield the same results | ||
289 | if dirPair == ON { | ||
290 | return // case "d" - nothing to do | ||
291 | } | ||
292 | |||
293 | if dirPair != dirEmbed { | ||
294 | // case "c": strong type found, opposite - check before (c.1) | ||
295 | dirPair = p.classBeforePair(loc) | ||
296 | if dirPair == dirEmbed || dirPair == ON { | ||
297 | // no strong opposite type found before - use embedding (c.2) | ||
298 | dirPair = dirEmbed | ||
299 | } | ||
300 | } | ||
301 | // else: case "b", strong type found matching embedding, | ||
302 | // no explicit action needed, as dirPair is already set to embedding | ||
303 | // direction | ||
304 | |||
305 | // set the bracket types to the type found | ||
306 | p.setBracketsToType(loc, dirPair, initialTypes) | ||
307 | } | ||
308 | |||
309 | func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) { | ||
310 | p.codesIsolatedRun[loc.opener] = dirPair | ||
311 | p.codesIsolatedRun[loc.closer] = dirPair | ||
312 | |||
313 | for i := loc.opener + 1; i < loc.closer; i++ { | ||
314 | index := p.indexes[i] | ||
315 | if initialTypes[index] != NSM { | ||
316 | break | ||
317 | } | ||
318 | p.codesIsolatedRun[i] = dirPair | ||
319 | } | ||
320 | |||
321 | for i := loc.closer + 1; i < len(p.indexes); i++ { | ||
322 | index := p.indexes[i] | ||
323 | if initialTypes[index] != NSM { | ||
324 | break | ||
325 | } | ||
326 | p.codesIsolatedRun[i] = dirPair | ||
327 | } | ||
328 | } | ||
329 | |||
330 | // resolveBrackets implements rule N0 for a list of pairs. | ||
331 | func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) { | ||
332 | for _, loc := range p.pairPositions { | ||
333 | p.assignBracketType(loc, dirEmbed, initialTypes) | ||
334 | } | ||
335 | } | ||