51 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			51 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Go
		
	
	
	
| package files
 | |
| 
 | |
| import (
 | |
| 	"unicode/utf8"
 | |
| )
 | |
| 
 | |
| func isBinary(content []byte, _ int) bool {
 | |
| 	maybeStr := string(content)
 | |
| 	runeCnt := utf8.RuneCount(content)
 | |
| 	runeIndex := 0
 | |
| 	gotRuneErrCnt := 0
 | |
| 	firstRuneErrIndex := -1
 | |
| 
 | |
| 	const (
 | |
| 		// 8 and below are control chars (e.g. backspace, null, eof, etc)
 | |
| 		maxControlCharsCode = 8
 | |
| 		// 0xFFFD(65533) is  the "error" Rune or "Unicode replacement character"
 | |
| 		// see https://golang.org/pkg/unicode/utf8/#pkg-constants
 | |
| 		unicodeReplacementChar = 0xFFFD
 | |
| 	)
 | |
| 
 | |
| 	for _, b := range maybeStr {
 | |
| 		if b <= maxControlCharsCode {
 | |
| 			return true
 | |
| 		}
 | |
| 
 | |
| 		if b == unicodeReplacementChar {
 | |
| 			// if it is not the last (utf8.UTFMax - x) rune
 | |
| 			if runeCnt > utf8.UTFMax && runeIndex < runeCnt-utf8.UTFMax {
 | |
| 				return true
 | |
| 			}
 | |
| 			// else it is the last (utf8.UTFMax - x) rune
 | |
| 			// there maybe Vxxx, VVxx, VVVx, thus, we may got max 3 0xFFFD rune (assume V is the byte we got)
 | |
| 			// for Chinese, it can only be Vxx, VVx, we may got max 2 0xFFFD rune
 | |
| 			gotRuneErrCnt++
 | |
| 
 | |
| 			// mark the first time
 | |
| 			if firstRuneErrIndex == -1 {
 | |
| 				firstRuneErrIndex = runeIndex
 | |
| 			}
 | |
| 		}
 | |
| 		runeIndex++
 | |
| 	}
 | |
| 
 | |
| 	// if last (utf8.UTFMax - x ) rune has the "error" Rune, but not all
 | |
| 	if firstRuneErrIndex != -1 && gotRuneErrCnt != runeCnt-firstRuneErrIndex {
 | |
| 		return true
 | |
| 	}
 | |
| 	return false
 | |
| }
 |