aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/text/unicode/bidi/bidi.go
blob: fd057601bd9178c5def359f002b06ef0769bd6f3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:generate go run gen.go gen_trieval.go gen_ranges.go

// Package bidi contains functionality for bidirectional text support.
//
// See https://www.unicode.org/reports/tr9.
//
// NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
// and without notice.
package bidi // import "golang.org/x/text/unicode/bidi"

// TODO
// - Transformer for reordering?
// - Transformer (validator, really) for Bidi Rule.

import (
	"bytes"
)

// This API tries to avoid dealing with embedding levels for now. Under the hood
// these will be computed, but the question is to which extent the user should
// know they exist. We should at some point allow the user to specify an
// embedding hierarchy, though.

// A Direction indicates the overall flow of text.
type Direction int

const (
	// LeftToRight indicates the text contains no right-to-left characters and
	// that either there are some left-to-right characters or the option
	// DefaultDirection(LeftToRight) was passed.
	LeftToRight Direction = iota

	// RightToLeft indicates the text contains no left-to-right characters and
	// that either there are some right-to-left characters or the option
	// DefaultDirection(RightToLeft) was passed.
	RightToLeft

	// Mixed indicates text contains both left-to-right and right-to-left
	// characters.
	Mixed

	// Neutral means that text contains no left-to-right and right-to-left
	// characters and that no default direction has been set.
	Neutral
)

type options struct {
	defaultDirection Direction
}

// An Option is an option for Bidi processing.
type Option func(*options)

// ICU allows the user to define embedding levels. This may be used, for example,
// to use hierarchical structure of markup languages to define embeddings.
// The following option may be a way to expose this functionality in this API.
// // LevelFunc sets a function that associates nesting levels with the given text.
// // The levels function will be called with monotonically increasing values for p.
// func LevelFunc(levels func(p int) int) Option {
// 	panic("unimplemented")
// }

// DefaultDirection sets the default direction for a Paragraph. The direction is
// overridden if the text contains directional characters.
func DefaultDirection(d Direction) Option {
	return func(opts *options) {
		opts.defaultDirection = d
	}
}

// A Paragraph holds a single Paragraph for Bidi processing.
type Paragraph struct {
	p          []byte
	o          Ordering
	opts       []Option
	types      []Class
	pairTypes  []bracketType
	pairValues []rune
	runes      []rune
	options    options
}

// Initialize the p.pairTypes, p.pairValues and p.types from the input previously
// set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph
// separator (bidi class B).
//
// The function p.Order() needs these values to be set, so this preparation could be postponed.
// But since the SetBytes and SetStrings functions return the length of the input up to the paragraph
// separator, the whole input needs to be processed anyway and should not be done twice.
//
// The function has the same return values as SetBytes() / SetString()
func (p *Paragraph) prepareInput() (n int, err error) {
	p.runes = bytes.Runes(p.p)
	bytecount := 0
	// clear slices from previous SetString or SetBytes
	p.pairTypes = nil
	p.pairValues = nil
	p.types = nil

	for _, r := range p.runes {
		props, i := LookupRune(r)
		bytecount += i
		cls := props.Class()
		if cls == B {
			return bytecount, nil
		}
		p.types = append(p.types, cls)
		if props.IsOpeningBracket() {
			p.pairTypes = append(p.pairTypes, bpOpen)
			p.pairValues = append(p.pairValues, r)
		} else if props.IsBracket() {
			// this must be a closing bracket,
			// since IsOpeningBracket is not true
			p.pairTypes = append(p.pairTypes, bpClose)
			p.pairValues = append(p.pairValues, r)
		} else {
			p.pairTypes = append(p.pairTypes, bpNone)
			p.pairValues = append(p.pairValues, 0)
		}
	}
	return bytecount, nil
}

// SetBytes configures p for the given paragraph text. It replaces text
// previously set by SetBytes or SetString. If b contains a paragraph separator
// it will only process the first paragraph and report the number of bytes
// consumed from b including this separator. Error may be non-nil if options are
// given.
func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
	p.p = b
	p.opts = opts
	return p.prepareInput()
}

// SetString configures s for the given paragraph text. It replaces text
// previously set by SetBytes or SetString. If s contains a paragraph separator
// it will only process the first paragraph and report the number of bytes
// consumed from s including this separator. Error may be non-nil if options are
// given.
func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
	p.p = []byte(s)
	p.opts = opts
	return p.prepareInput()
}

// IsLeftToRight reports whether the principle direction of rendering for this
// paragraphs is left-to-right. If this returns false, the principle direction
// of rendering is right-to-left.
func (p *Paragraph) IsLeftToRight() bool {
	return p.Direction() == LeftToRight
}

// Direction returns the direction of the text of this paragraph.
//
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
func (p *Paragraph) Direction() Direction {
	return p.o.Direction()
}

// TODO: what happens if the position is > len(input)? This should return an error.

// RunAt reports the Run at the given position of the input text.
//
// This method can be used for computing line breaks on paragraphs.
func (p *Paragraph) RunAt(pos int) Run {
	c := 0
	runNumber := 0
	for i, r := range p.o.runes {
		c += len(r)
		if pos < c {
			runNumber = i
		}
	}
	return p.o.Run(runNumber)
}

func calculateOrdering(levels []level, runes []rune) Ordering {
	var curDir Direction

	prevDir := Neutral
	prevI := 0

	o := Ordering{}
	// lvl = 0,2,4,...: left to right
	// lvl = 1,3,5,...: right to left
	for i, lvl := range levels {
		if lvl%2 == 0 {
			curDir = LeftToRight
		} else {
			curDir = RightToLeft
		}
		if curDir != prevDir {
			if i > 0 {
				o.runes = append(o.runes, runes[prevI:i])
				o.directions = append(o.directions, prevDir)
				o.startpos = append(o.startpos, prevI)
			}
			prevI = i
			prevDir = curDir
		}
	}
	o.runes = append(o.runes, runes[prevI:])
	o.directions = append(o.directions, prevDir)
	o.startpos = append(o.startpos, prevI)
	return o
}

// Order computes the visual ordering of all the runs in a Paragraph.
func (p *Paragraph) Order() (Ordering, error) {
	if len(p.types) == 0 {
		return Ordering{}, nil
	}

	for _, fn := range p.opts {
		fn(&p.options)
	}
	lvl := level(-1)
	if p.options.defaultDirection == RightToLeft {
		lvl = 1
	}
	para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl)
	if err != nil {
		return Ordering{}, err
	}

	levels := para.getLevels([]int{len(p.types)})

	p.o = calculateOrdering(levels, p.runes)
	return p.o, nil
}

// Line computes the visual ordering of runs for a single line starting and
// ending at the given positions in the original text.
func (p *Paragraph) Line(start, end int) (Ordering, error) {
	lineTypes := p.types[start:end]
	para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1)
	if err != nil {
		return Ordering{}, err
	}
	levels := para.getLevels([]int{len(lineTypes)})
	o := calculateOrdering(levels, p.runes[start:end])
	return o, nil
}

// An Ordering holds the computed visual order of runs of a Paragraph. Calling
// SetBytes or SetString on the originating Paragraph invalidates an Ordering.
// The methods of an Ordering should only be called by one goroutine at a time.
type Ordering struct {
	runes      [][]rune
	directions []Direction
	startpos   []int
}

// Direction reports the directionality of the runs.
//
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
func (o *Ordering) Direction() Direction {
	return o.directions[0]
}

// NumRuns returns the number of runs.
func (o *Ordering) NumRuns() int {
	return len(o.runes)
}

// Run returns the ith run within the ordering.
func (o *Ordering) Run(i int) Run {
	r := Run{
		runes:     o.runes[i],
		direction: o.directions[i],
		startpos:  o.startpos[i],
	}
	return r
}

// TODO: perhaps with options.
// // Reorder creates a reader that reads the runes in visual order per character.
// // Modifiers remain after the runes they modify.
// func (l *Runs) Reorder() io.Reader {
// 	panic("unimplemented")
// }

// A Run is a continuous sequence of characters of a single direction.
type Run struct {
	runes     []rune
	direction Direction
	startpos  int
}

// String returns the text of the run in its original order.
func (r *Run) String() string {
	return string(r.runes)
}

// Bytes returns the text of the run in its original order.
func (r *Run) Bytes() []byte {
	return []byte(r.String())
}

// TODO: methods for
// - Display order
// - headers and footers
// - bracket replacement.

// Direction reports the direction of the run.
func (r *Run) Direction() Direction {
	return r.direction
}

// Pos returns the position of the Run within the text passed to SetBytes or SetString of the
// originating Paragraph value.
func (r *Run) Pos() (start, end int) {
	return r.startpos, r.startpos + len(r.runes) - 1
}

// AppendReverse reverses the order of characters of in, appends them to out,
// and returns the result. Modifiers will still follow the runes they modify.
// Brackets are replaced with their counterparts.
func AppendReverse(out, in []byte) []byte {
	ret := make([]byte, len(in)+len(out))
	copy(ret, out)
	inRunes := bytes.Runes(in)

	for i, r := range inRunes {
		prop, _ := LookupRune(r)
		if prop.IsBracket() {
			inRunes[i] = prop.reverseBracket(r)
		}
	}

	for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 {
		inRunes[i], inRunes[j] = inRunes[j], inRunes[i]
	}
	copy(ret[len(out):], string(inRunes))

	return ret
}

// ReverseString reverses the order of characters in s and returns a new string.
// Modifiers will still follow the runes they modify. Brackets are replaced with
// their counterparts.
func ReverseString(s string) string {
	input := []rune(s)
	li := len(input)
	ret := make([]rune, li)
	for i, r := range input {
		prop, _ := LookupRune(r)
		if prop.IsBracket() {
			ret[li-i-1] = prop.reverseBracket(r)
		} else {
			ret[li-i-1] = r
		}
	}
	return string(ret)
}