217 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			217 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Go
		
	
	
	
| // Copyright 2015 Huan Du. All rights reserved.
 | |
| // Licensed under the MIT license that can be found in the LICENSE file.
 | |
| 
 | |
| package xstrings
 | |
| 
 | |
| import (
 | |
| 	"strings"
 | |
| 	"unicode/utf8"
 | |
| )
 | |
| 
 | |
| // Reverse a utf8 encoded string.
 | |
| func Reverse(str string) string {
 | |
| 	var size int
 | |
| 
 | |
| 	tail := len(str)
 | |
| 	buf := make([]byte, tail)
 | |
| 	s := buf
 | |
| 
 | |
| 	for len(str) > 0 {
 | |
| 		_, size = utf8.DecodeRuneInString(str)
 | |
| 		tail -= size
 | |
| 		s = append(s[:tail], []byte(str[:size])...)
 | |
| 		str = str[size:]
 | |
| 	}
 | |
| 
 | |
| 	return string(buf)
 | |
| }
 | |
| 
 | |
| // Slice a string by rune.
 | |
| //
 | |
| // Start must satisfy 0 <= start <= rune length.
 | |
| //
 | |
| // End can be positive, zero or negative.
 | |
| // If end >= 0, start and end must satisfy start <= end <= rune length.
 | |
| // If end < 0, it means slice to the end of string.
 | |
| //
 | |
| // Otherwise, Slice will panic as out of range.
 | |
| func Slice(str string, start, end int) string {
 | |
| 	var size, startPos, endPos int
 | |
| 
 | |
| 	origin := str
 | |
| 
 | |
| 	if start < 0 || end > len(str) || (end >= 0 && start > end) {
 | |
| 		panic("out of range")
 | |
| 	}
 | |
| 
 | |
| 	if end >= 0 {
 | |
| 		end -= start
 | |
| 	}
 | |
| 
 | |
| 	for start > 0 && len(str) > 0 {
 | |
| 		_, size = utf8.DecodeRuneInString(str)
 | |
| 		start--
 | |
| 		startPos += size
 | |
| 		str = str[size:]
 | |
| 	}
 | |
| 
 | |
| 	if end < 0 {
 | |
| 		return origin[startPos:]
 | |
| 	}
 | |
| 
 | |
| 	endPos = startPos
 | |
| 
 | |
| 	for end > 0 && len(str) > 0 {
 | |
| 		_, size = utf8.DecodeRuneInString(str)
 | |
| 		end--
 | |
| 		endPos += size
 | |
| 		str = str[size:]
 | |
| 	}
 | |
| 
 | |
| 	if len(str) == 0 && (start > 0 || end > 0) {
 | |
| 		panic("out of range")
 | |
| 	}
 | |
| 
 | |
| 	return origin[startPos:endPos]
 | |
| }
 | |
| 
 | |
| // Partition splits a string by sep into three parts.
 | |
| // The return value is a slice of strings with head, match and tail.
 | |
| //
 | |
| // If str contains sep, for example "hello" and "l", Partition returns
 | |
| //     "he", "l", "lo"
 | |
| //
 | |
| // If str doesn't contain sep, for example "hello" and "x", Partition returns
 | |
| //     "hello", "", ""
 | |
| func Partition(str, sep string) (head, match, tail string) {
 | |
| 	index := strings.Index(str, sep)
 | |
| 
 | |
| 	if index == -1 {
 | |
| 		head = str
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	head = str[:index]
 | |
| 	match = str[index : index+len(sep)]
 | |
| 	tail = str[index+len(sep):]
 | |
| 	return
 | |
| }
 | |
| 
 | |
| // LastPartition splits a string by last instance of sep into three parts.
 | |
| // The return value is a slice of strings with head, match and tail.
 | |
| //
 | |
| // If str contains sep, for example "hello" and "l", LastPartition returns
 | |
| //     "hel", "l", "o"
 | |
| //
 | |
| // If str doesn't contain sep, for example "hello" and "x", LastPartition returns
 | |
| //     "", "", "hello"
 | |
| func LastPartition(str, sep string) (head, match, tail string) {
 | |
| 	index := strings.LastIndex(str, sep)
 | |
| 
 | |
| 	if index == -1 {
 | |
| 		tail = str
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	head = str[:index]
 | |
| 	match = str[index : index+len(sep)]
 | |
| 	tail = str[index+len(sep):]
 | |
| 	return
 | |
| }
 | |
| 
 | |
| // Insert src into dst at given rune index.
 | |
| // Index is counted by runes instead of bytes.
 | |
| //
 | |
| // If index is out of range of dst, panic with out of range.
 | |
| func Insert(dst, src string, index int) string {
 | |
| 	return Slice(dst, 0, index) + src + Slice(dst, index, -1)
 | |
| }
 | |
| 
 | |
| // Scrub scrubs invalid utf8 bytes with repl string.
 | |
| // Adjacent invalid bytes are replaced only once.
 | |
| func Scrub(str, repl string) string {
 | |
| 	var buf *stringBuilder
 | |
| 	var r rune
 | |
| 	var size, pos int
 | |
| 	var hasError bool
 | |
| 
 | |
| 	origin := str
 | |
| 
 | |
| 	for len(str) > 0 {
 | |
| 		r, size = utf8.DecodeRuneInString(str)
 | |
| 
 | |
| 		if r == utf8.RuneError {
 | |
| 			if !hasError {
 | |
| 				if buf == nil {
 | |
| 					buf = &stringBuilder{}
 | |
| 				}
 | |
| 
 | |
| 				buf.WriteString(origin[:pos])
 | |
| 				hasError = true
 | |
| 			}
 | |
| 		} else if hasError {
 | |
| 			hasError = false
 | |
| 			buf.WriteString(repl)
 | |
| 
 | |
| 			origin = origin[pos:]
 | |
| 			pos = 0
 | |
| 		}
 | |
| 
 | |
| 		pos += size
 | |
| 		str = str[size:]
 | |
| 	}
 | |
| 
 | |
| 	if buf != nil {
 | |
| 		buf.WriteString(origin)
 | |
| 		return buf.String()
 | |
| 	}
 | |
| 
 | |
| 	// No invalid byte.
 | |
| 	return origin
 | |
| }
 | |
| 
 | |
| // WordSplit splits a string into words. Returns a slice of words.
 | |
| // If there is no word in a string, return nil.
 | |
| //
 | |
| // Word is defined as a locale dependent string containing alphabetic characters,
 | |
| // which may also contain but not start with `'` and `-` characters.
 | |
| func WordSplit(str string) []string {
 | |
| 	var word string
 | |
| 	var words []string
 | |
| 	var r rune
 | |
| 	var size, pos int
 | |
| 
 | |
| 	inWord := false
 | |
| 
 | |
| 	for len(str) > 0 {
 | |
| 		r, size = utf8.DecodeRuneInString(str)
 | |
| 
 | |
| 		switch {
 | |
| 		case isAlphabet(r):
 | |
| 			if !inWord {
 | |
| 				inWord = true
 | |
| 				word = str
 | |
| 				pos = 0
 | |
| 			}
 | |
| 
 | |
| 		case inWord && (r == '\'' || r == '-'):
 | |
| 			// Still in word.
 | |
| 
 | |
| 		default:
 | |
| 			if inWord {
 | |
| 				inWord = false
 | |
| 				words = append(words, word[:pos])
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		pos += size
 | |
| 		str = str[size:]
 | |
| 	}
 | |
| 
 | |
| 	if inWord {
 | |
| 		words = append(words, word[:pos])
 | |
| 	}
 | |
| 
 | |
| 	return words
 | |
| }
 |