-
-
Notifications
You must be signed in to change notification settings - Fork 946
Expand file tree
/
Copy pathstring.go
More file actions
318 lines (277 loc) · 8.96 KB
/
string.go
File metadata and controls
318 lines (277 loc) · 8.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
package lo
import (
"math"
"regexp"
"strings"
"unicode"
"unicode/utf8"
"golang.org/x/text/cases"
"golang.org/x/text/language"
"github.com/samber/lo/internal/xrand"
)
var (
//nolint:revive
LowerCaseLettersCharset = []rune("abcdefghijklmnopqrstuvwxyz")
UpperCaseLettersCharset = []rune("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
LettersCharset = append(LowerCaseLettersCharset, UpperCaseLettersCharset...)
NumbersCharset = []rune("0123456789")
AlphanumericCharset = append(LettersCharset, NumbersCharset...)
SpecialCharset = []rune("!@#$%^&*()_+-=[]{}|;':\",./<>?")
AllCharset = append(AlphanumericCharset, SpecialCharset...)
// bearer:disable go_lang_permissive_regex_validation
splitWordReg = regexp.MustCompile(`([a-z])([A-Z0-9])|([a-zA-Z])([0-9])|([0-9])([a-zA-Z])|([A-Z])([A-Z])([a-z])`)
// bearer:disable go_lang_permissive_regex_validation
splitNumberLetterReg = regexp.MustCompile(`([0-9])([a-zA-Z])`)
maximumCapacity = math.MaxInt>>1 + 1
)
// RandomString return a random string.
// Play: https://go.dev/play/p/rRseOQVVum4
func RandomString(size int, charset []rune) string {
if size <= 0 {
panic("lo.RandomString: size must be greater than 0")
}
if len(charset) == 0 {
panic("lo.RandomString: charset must not be empty")
}
// see https://stackoverflow.com/questions/22892120/how-to-generate-a-random-string-of-a-fixed-length-in-go
var sb strings.Builder
sb.Grow(size)
if len(charset) == 1 {
// Edge case, because if the charset is a single character,
// it will panic below (divide by zero).
// -> https://github.com/samber/lo/issues/679
for i := 0; i < size; i++ {
sb.WriteRune(charset[0])
}
return sb.String()
}
// Calculate the number of bits required to represent the charset,
// e.g., for 62 characters, it would need 6 bits (since 62 -> 64 = 2^6)
letterIDBits := int(math.Log2(float64(nearestPowerOfTwo(len(charset)))))
// Determine the corresponding bitmask,
// e.g., for 62 characters, the bitmask would be 111111.
var letterIDMask int64 = 1<<letterIDBits - 1
// Available count, since xrand.Int64() returns a non-negative number, the first bit is fixed, so there are 63 random bits
// e.g., for 62 characters, this value is 10 (63 / 6).
letterIDMax := 63 / letterIDBits
// Generate the random string in a loop.
for i, cache, remain := size-1, xrand.Int64(), letterIDMax; i >= 0; {
// Regenerate the random number if all available bits have been used
if remain == 0 {
cache, remain = xrand.Int64(), letterIDMax
}
// Select a character from the charset
if idx := int(cache & letterIDMask); idx < len(charset) {
sb.WriteRune(charset[idx])
i--
}
// Shift the bits to the right to prepare for the next character selection,
// e.g., for 62 characters, shift by 6 bits.
cache >>= letterIDBits
// Decrease the remaining number of uses for the current random number.
remain--
}
return sb.String()
}
// nearestPowerOfTwo returns the nearest power of two.
func nearestPowerOfTwo(capacity int) int {
n := capacity - 1
n |= n >> 1
n |= n >> 2
n |= n >> 4
n |= n >> 8
n |= n >> 16
if n < 0 {
return 1
}
if n >= maximumCapacity {
return maximumCapacity
}
return n + 1
}
// Substring extracts a substring from a string with Unicode character (rune) awareness.
// offset - starting position of the substring (can be positive, negative, or zero)
// length - number of characters to extract
// With positive offset, counting starts from the beginning of the string
// With negative offset, counting starts from the end of the string
// Play: https://go.dev/play/p/TQlxQi82Lu1
func Substring[T ~string](str T, offset int, length uint) T {
str = substring(str, offset, length)
// Validate UTF-8 and fix invalid sequences
if !utf8.ValidString(string(str)) {
// Convert to []rune to replicate behavior with duplicated �
str = T([]rune(str))
}
// Remove null bytes from result
return T(strings.ReplaceAll(string(str), "\x00", ""))
}
func substring[T ~string](str T, offset int, length uint) T {
switch {
// Empty length or offset beyond string bounds - return empty string
case length == 0, offset >= len(str):
return ""
// Positive offset - count from the beginning
case offset > 0:
// Skip offset runes from the start
for i, r := range str {
if offset--; offset == 0 {
str = str[i+utf8.RuneLen(r):]
break
}
}
// If couldn't skip enough runes - string is shorter than offset
if offset != 0 {
return ""
}
// If remaining string is shorter than or equal to length - return it entirely
if uint(len(str)) <= length {
return str
}
// Otherwise proceed to trimming by length
fallthrough
// Zero offset or offset less than minus string length - start from beginning
case offset < -len(str), offset == 0:
// Count length runes from the start
for i := range str {
if length == 0 {
return str[:i]
}
length--
}
return str
// Negative offset - count from the end of string
default: // -len(str) < offset < 0
// Helper function to move backward through runes
backwardPos := func(end int, count uint) (start int) {
for {
_, i := utf8.DecodeLastRuneInString(string(str[:end]))
end -= i
if count--; count == 0 || end == 0 {
return end
}
}
}
offset := uint(-offset)
// If offset is less than or equal to length - take from position to end
if offset <= length {
start := backwardPos(len(str), offset)
return str[start:]
}
// Otherwise calculate start and end positions
end := backwardPos(len(str), offset-length)
start := backwardPos(end, length)
return str[start:end]
}
}
// ChunkString returns a slice of strings split into groups of length size. If the string can't be split evenly,
// the final chunk will be the remaining characters.
// Play: https://go.dev/play/p/__FLTuJVz54
//
// Note: lo.ChunkString and lo.Chunk functions behave inconsistently for empty input: lo.ChunkString("", n) returns [""] instead of [].
// See https://github.com/samber/lo/issues/788
func ChunkString[T ~string](str T, size int) []T {
if size <= 0 {
panic("lo.ChunkString: size must be greater than 0")
}
if size >= len(str) {
return []T{str}
}
chunks := make([]T, 0, ((len(str)-1)/size)+1)
currentLen := 0
currentStart := 0
for i := range str {
if currentLen == size {
chunks = append(chunks, str[currentStart:i])
currentLen = 0
currentStart = i
}
currentLen++
}
chunks = append(chunks, str[currentStart:])
return chunks
}
// RuneLength is an alias to utf8.RuneCountInString which returns the number of runes in string.
// Play: https://go.dev/play/p/tuhgW_lWY8l
func RuneLength(str string) int {
return utf8.RuneCountInString(str)
}
// PascalCase converts string to pascal case.
// Play: https://go.dev/play/p/Dy_V_6DUYhe
func PascalCase(str string) string {
items := Words(str)
for i := range items {
items[i] = Capitalize(items[i])
}
return strings.Join(items, "")
}
// CamelCase converts string to camel case.
// Play: https://go.dev/play/p/Go6aKwUiq59
func CamelCase(str string) string {
items := Words(str)
for i, item := range items {
item = strings.ToLower(item)
if i > 0 {
item = Capitalize(item)
}
items[i] = item
}
return strings.Join(items, "")
}
// KebabCase converts string to kebab case.
// Play: https://go.dev/play/p/96gT_WZnTVP
func KebabCase(str string) string {
items := Words(str)
for i := range items {
items[i] = strings.ToLower(items[i])
}
return strings.Join(items, "-")
}
// SnakeCase converts string to snake case.
// Play: https://go.dev/play/p/ziB0V89IeVH
func SnakeCase(str string) string {
items := Words(str)
for i := range items {
items[i] = strings.ToLower(items[i])
}
return strings.Join(items, "_")
}
// Words splits string into a slice of its words.
// Play: https://go.dev/play/p/-f3VIQqiaVw
func Words(str string) []string {
str = splitWordReg.ReplaceAllString(str, `$1$3$5$7 $2$4$6$8$9`)
// example: Int8Value => Int 8Value => Int 8 Value
str = splitNumberLetterReg.ReplaceAllString(str, "$1 $2")
var result strings.Builder
result.Grow(len(str))
for _, r := range str {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
result.WriteRune(r)
} else {
result.WriteRune(' ')
}
}
return strings.Fields(result.String())
}
// Capitalize converts the first character of string to upper case and the remaining to lower case.
// Play: https://go.dev/play/p/uLTZZQXqnsa
func Capitalize(str string) string {
return cases.Title(language.English).String(str)
}
// Ellipsis trims and truncates a string to a specified length in runes and appends an ellipsis
// if truncated. The length parameter counts Unicode code points (runes), not bytes, so multi-byte
// characters such as emoji or CJK ideographs are never split in the middle.
// Play: https://go.dev/play/p/qE93rgqe1TW
func Ellipsis(str string, length int) string {
str = strings.TrimSpace(str)
const ellipsis = "..."
cutPosition := 0
for i := range str {
if length == len(ellipsis) {
cutPosition = i
}
if length--; length < 0 {
return strings.TrimSpace(str[:cutPosition]) + ellipsis
}
}
return str
}