strings包和bytes包

strings包和bytes包非常像，几乎所有函数都有string和[]byte两种接口，其中前者被实现在strings包中，而后者被是现在bytes包中，所以这里将这两个包一起学习。

官方文档：

strings包：https://pkg.go.dev/strings@go1.21.4

bytes包：https://pkg.go.dev/bytes@go1.21.4

函数

clone

// strings
func Clone(s string) string
// bytes
func Clone(b []byte) []byte

克隆[]byte和string。默认赋值操作只是浅拷贝，Clone会对[]byte和string进行深拷贝。

func CloneTest() {
	sliceCopyTest()
	sliceCloneTest()
	stringCopyTest()
	stringCloneTest()
}

func sliceCopyTest() {
	bts01 := []byte("testing")
	bts02 := bts01
	fmt.Print("Copy(bts02 := bts01), ")
	bts01[0] = 's'
	fmt.Println("bts01[0] = 's':")
	fmt.Printf("bts01 = %q, bts02 = %q\n", bts01, bts02)
	fmt.Println()
}

func sliceCloneTest() {
	bts01 := []byte("testing")
	bts02 := bytes.Clone(bts01)
	fmt.Print("Copy(bts02 := bytes.Clone(bts01)), ")
	bts01[0] = 's'
	fmt.Println("bts01[0] = 's':")
	fmt.Printf("bts01 = %q, bts02 = %q\n", bts01, bts02)
	fmt.Println()
}

func stringCopyTest() {
	str01 := "testing"
	str02 := str01
	fmt.Println("str02 := str01:")
	fmt.Printf("unsafe.StringData(str01) == unsafe.StringData(str02): %v\n", unsafe.StringData(str01) == unsafe.StringData(str02))
	fmt.Println()
}

func stringCloneTest() {
	str01 := "testing"
	str02 := strings.Clone(str01)
	fmt.Println("str02 := strings(str01):")
	fmt.Printf("unsafe.StringData(str01) == unsafe.StringData(str02): %v\n", unsafe.StringData(str01) == unsafe.StringData(str02))
	fmt.Println()
}

/*
Output:
Copy(bts02 := bts01), bts01[0] = 's':
bts01 = "sesting", bts02 = "sesting"

Copy(bts02 := bytes.Clone(bts01)), bts01[0] = 's':
bts01 = "sesting", bts02 = "testing"

str02 := str01:
unsafe.StringData(str01) == unsafe.StringData(str02): true

str02 := strings(str01):
unsafe.StringData(str01) == unsafe.StringData(str02): false
*/

内容检查类的函数，是否包含。Contains/Has.

// strings
// s中是否包含substr
func Contains(s, substr string) bool
// s中是否包含r
func ContainsRune(s string, r rune) bool
// s中是否包含任何chars中的rune
func ContainsAny(s, chars string) bool
// 对s中的所有rune调用f，是否有一个调用结果为true
func ContainsFunc(s string, f func(rune) bool) bool
// s中是否有前缀prefix
func HasPrefix(s, prefix string) bool
// s中是否有后缀suffix
func HasSuffix(s, suffix string) bool

// bytes
func Contains(b, subslice []byte) bool
func ContainsRune(b []byte, r rune) bool
func ContainsAny(b []byte, chars string) bool // 注意，chars类型为string
func ContainsFunc(b []byte, f func(rune) bool) bool
func HasPrefix(s, prefix []byte) bool
func HasSuffix(s, suffix []byte) bool

测试：

func ContainsAndHasTest() {
	str01 := "“你好”是一个很好的词汇"
	fmt.Println(strings.Contains(str01, "词汇"))      // true
	fmt.Println(strings.ContainsAny(str01, "词语"))   // true
	fmt.Println(strings.ContainsRune(str01, '你'))    // true
	fmt.Println(strings.ContainsFunc(str01, func(r rune) bool { return r == '你' || r == '我' })) // true
	fmt.Println(strings.HasPrefix(str01, "“你好"))    // true
	fmt.Println(strings.HasSuffix(str01, "词汇"))     // true
	fmt.Println()
}

Count

// strings
func Count(s, substr string) int
// bytes
func Count(s, sep []byte) int

获取字串在s中的数量，字串为空串则返回1 + s中unicode码点的数量

获取字串的下标 Index

// strings
// 字串在s中的下标（in byte）
func Index(s, substr string) int
// unicode码点r在s中的第一个下标，r不是一个合法的码点则返回第一个非法码点的下标
func IndexRune(s string, r rune) int
// 字节c在s中的第一个下标
func IndexByte(s string, c byte) int
// chars中任意一个rune出现在s中的第一个下标
func IndexAny(s, chars string) int
// s中的rune r，使得f(r)为真的第一个rune的下标
func IndexFunc(s string, f func(rune) bool) int

// 最后一个下标的版本，注意，没有IndexLastRune这个函数
func LastIndex(s, substr string) int
func LastIndexAny(s, chars string) int
func LastIndexByte(s string, c byte) int
func LastIndexFunc(s string, f func(rune) bool) int

// bytes
func Index(s, sep []byte) int
func IndexAny(s []byte, chars string) int
func IndexByte(b []byte, c byte) int
func IndexFunc(s []byte, f func(r rune) bool) int
func IndexRune(s []byte, r rune) int
func LastIndex(s, sep []byte) int
func LastIndexAny(s []byte, chars string) int
func LastIndexByte(s []byte, c byte) int
func LastIndexFunc(s []byte, f func(r rune) bool) int

获取string/[]byte中rune/byte/string/[]byte的下标/最后一个下标的函数

func IndexTest() {
	str01 := "你好你好你呀"
	str02 := "好你"
	fmt.Printf("Index(%q, %q) = %d\n", str01, str02, strings.Index(str01, str02))
	fmt.Printf("IndexByte(%q, %q) = %d\n", str01, str02[0], strings.IndexByte(str01, str02[0]))
	r, _ := utf8.DecodeRuneInString(str02)
	fmt.Printf("IndexRune(%q, %q) = %d\n", str01, r, strings.IndexRune(str01, r))
	fmt.Printf("IndexAny(%q, %q) = %d\n", str01, str02, strings.IndexAny(str01, str02))
	isNi := func(r rune) bool { return r == '你' }
	fmt.Printf("IndexFunc(%q, isNi) = %d\n", str01, strings.IndexFunc(str01, isNi))

	fmt.Printf("LastIndex(%q, %q) = %d\n", str01, str02, strings.LastIndex(str01, str02))
	fmt.Printf("LastIndexByte(%q, %q) = %d\n", str01, str02[0], strings.LastIndexByte(str01, str02[0]))
	// fmt.Printf("LastIndexRune(%q, %q) = %d\n", str01, r, strings.LastIndexRune(str01, r)) // No Such Function
	fmt.Printf("LastIndexAny(%q, %q) = %d\n", str01, str02, strings.LastIndexAny(str01, str02))
	fmt.Printf("LastIndexFunc(%q, isNi) = %d\n", str01, strings.LastIndexFunc(str01, isNi))
}

/*
Output:
Index("你好你好你呀", "好你") = 3
IndexByte("你好你好你呀", 'å') = 3
IndexRune("你好你好你呀", '好') = 3
IndexAny("你好你好你呀", "好你") = 0
IndexFunc("你好你好你呀", isNi) = 0
LastIndex("你好你好你呀", "好你") = 9
LastIndexByte("你好你好你呀", 'å') = 15
LastIndexAny("你好你好你呀", "好你") = 12
LastIndexFunc("你好你好你呀", isNi) = 12
*/

相等性测试

// strings
// string的相等性测试可以直接使用 == 操作符
// 在unicode字符集中忽略大小写进行比较，更加通用
func EqualFold(s, t string) bool

// bytes
// a和b是否长度相同且内容相同
func Equal(a, b []byte) bool
func EqualFold(s, t []byte) bool

Equal是相等性测试，EqualFold是忽略大小写的相等性测试。

// 用泛型实现Equal
func Equal[T comparable](a, b []T) bool {
	if len(a) != len(b) {
		return false
	}
	for i := 0; i < len(a); i++ {
		if a[i] != b[i] {
			return false
		}
	}
	return true
}

func EqualTest() {
	strSlice := []string{"abc", "def", "你好"}
	strSlice2 := []string{"abc", "def", "你好"}
	byteSlice := []byte{1, 2, 3, 4, 5}
	byteSlice2 := []byte{1, 2, 3, 4, 5}
	fmt.Printf("Equal(%q, %q) = %v\n", strSlice, strSlice2, Equal(strSlice, strSlice2))
	fmt.Printf("Equal(%q, %q) = %v\n", byteSlice, byteSlice2, Equal(byteSlice, byteSlice2))
}
/*
Output:
Equal(["abc" "def" "你好"], ["abc" "def" "你好"]) = true
Equal("\x01\x02\x03\x04\x05", "\x01\x02\x03\x04\x05") = true
*/

去掉前缀、后缀，分割等函数

// strings
// 按照sep，将s分割为 before、sep、after，如果sep在s中不存在，返回s, "", false
func Cut(s, sep string) (before, after string, found bool)
// 如果prefix是s的前缀，返回s[len(prefix):],true，s中没有prefix则返回s,false，prefix为空则返回s,true
func CutPrefix(s, prefix string) (after string, found bool)
// 剪除后缀
func CutSuffix(s, suffix string) (before string, found bool)
// 按照sep将s分割
// If s does not contain sep and sep is not empty, Split returns a slice of length 1 whose only element is s.
// If sep is empty, Split splits after each UTF-8 sequence. 
// If both s and sep are empty, Split returns an empty slice.
func Split(s, sep string) []string
// 最多划分为n个substring
// n > 0: at most n substrings; the last substring will be the unsplit remainder.
// n == 0: the result is nil (zero substrings)
// n < 0: all substrings
func SplitN(s, sep string, n int) []string
// sep不会被丢弃，而是放在被分割的字串后面
func SplitAfter(s, sep string) []string
// 最多划分为n个字串
func SplitAfterN(s, sep string, n int) []string
// 按照空白字符将s分割为slice，空白字符的判断标准为unicode.IsSpace()
func Fields(s string) []string
// 按照f(r)的结果将s分割为slice，Fields等效于FieldsFunc(s, unicode.IsSpace)
func FieldsFunc(s string, f func(rune) bool) []string
// 从前面和后面删除连续的cutset中的unicode码点，然后返回s的slice
func Trim(s, cutset string) string
// 从前面和后面删除连续的空白字符，相等于TrimFunc(s, unicode.isSpace)
func TrimSpace(s string) string
// 从s前面和后面删除连续的使得f(r)为真的unicode码点，然后返回s的slice
func TrimFunc(s string, f func(rune) bool) string
// 从s前面删除连续的cutset中的unicode码点，然后返回s的slice
func TrimLeft(s, cutset string) string
// 从s后面删除连续的cutset中的unicode码点，然后返回s的slice
func TrimRight(s, cutset string) string
// 从s前面删除连续的使得f(r)为真的unicode码点，然后返回s的slice
func TrimLeftFunc(s string, f func(rune) bool) string
// 从s前面删除连续的使得f(r)为真的unicode码点，然后返回s的slice
func TrimRightFunc(s string, f func(rune) bool) string
// 如果s的前缀是prefix，则删除该前缀，返回s的slice；否则返回s
func TrimPrefix(s, prefix string) string
// 如果s的后缀是suffix，则删除该后缀，返回s的slice，否则返回s
func TrimSuffix(s, suffix string) string


// bytes
func Cut(s, sep []byte) (before, after []byte, found bool)
func CutPrefix(s, prefix []byte) (after []byte, found bool)
func CutSuffix(s, suffix []byte) (before []byte, found bool)
func Split(s, sep []byte) [][]byte
func SplitAfter(s, sep []byte) [][]byte
func SplitAfterN(s, sep []byte, n int) [][]byte
func SplitN(s, sep []byte, n int) [][]byte
func Fields(s []byte) [][]byte
func FieldsFunc(s []byte, f func(rune) bool) [][]byte
func Trim(s []byte, cutset string) []byte
func TrimFunc(s []byte, f func(r rune) bool) []byte
func TrimLeft(s []byte, cutset string) []byte
func TrimLeftFunc(s []byte, f func(r rune) bool) []byte
func TrimPrefix(s, prefix []byte) []byte
func TrimRight(s []byte, cutset string) []byte
func TrimRightFunc(s []byte, f func(r rune) bool) []byte
func TrimSpace(s []byte) []byte
func TrimSuffix(s, suffix []byte) []byte

重复字符串

// strings
// 返回count个s首尾相连组成的字符串，count < 0是出错
func Repeat(s string, count int) string

// bytes
func Repeat(b []byte, count int) []byte

单纯的将s重复几次

替换字符串/转换字符串

// strings
// 将s中的每个字符使用mapping函数替换，返回拷贝。如果mapping函数返回一个负值，函数会丢弃该字符
func Map(mapping func(rune) rune, s string) string
// 将s中n个不重叠的old字符串替换为new字符串，n < 0时替换所有字符串
//  old为空时在每个unicode码点之间（包括最开始和最后）添加new对应的字符串，最多替换n个
func Replace(s, old, new string, n int) string
// 等于Replace(s, old, new, -1)
func ReplaceAll(s, old, new string) string
// 将s中的unicode码点转化为小写
func ToLower(s string) string
// 部分语言有特殊的大小写映射，ToLowerSprcial方法优先处理这些特殊的大小写映射
func ToLowerSpecial(c unicode.SpecialCase, s string) string
// 将s中的unicode码点转化为标题格式，目前来看英文转化为全大写，中文不变
func ToTitle(s string) string
// 部分语言有特殊的标题格式映射
func ToTitleSpecial(c unicode.SpecialCase, s string) string
// 转化为大写
func ToUpper(s string) string
// 部分语言有特殊的大小写映射
func ToUpperSpecial(c unicode.SpecialCase, s string) string
// 将s中非法的utf8字节序列替换为replacement
func ToValidUTF8(s, replacement string) string

// bytes
func Map(mapping func(r rune) rune, s []byte) []byte
func Replace(s, old, new []byte, n int) []byte
func ReplaceAll(s, old, new []byte) []byte
// 将s中的字节解读为unicode码点
func Runes(s []byte) []rune
func ToLower(s []byte) []byte
func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte
func ToTitle(s []byte) []byte
func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte
func ToUpper(s []byte) []byte
func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte
func ToValidUTF8(s, replacement []byte) []byte

这部分是关于字符串/[]byte的替换/转换的函数，针对的都是字符串，针对单个unicode码点（rune类型）的操作（比如IsUpper, IsSpace, ToUpper）在unicode库中。

类型（type）

用于构建string的类

因为string是不可变类型，在用+操作符构建string时每一次都要分配一次内存，导致效率低下。

所以go标准库提供了高效构建string的类。

Builder类

// strings
type Builder struct {
	// contains filtered or unexported fields
}

Builder类型是strings包提供的，用于高效构建string的类，其会最小化内存拷贝。

不要拷贝非零的Builder对象。

Builder类的成员方法

func (b *Builder) Len() int

builder已经写入的字节数；b.Len() = len(b.String())

func (b *Builder) Cap() int

获取该builder已经分配的，用于构建string的空间，包括已经写入的空间。

func (b *Builder) Grow(n int)

增长builder的容量，使其至少能再写入n个字节，而不用再次分配内存。

func (b *Builder) Reset()

清空builder已经写入的内容

func (b *Builder) String() string

返回累加的string

func (b *Builder) Write(p []byte) (int, error)
func (b *Builder) WriteByte(c byte) error
func (b *Builder) WriteRune(r rune) (int, error)
func (b *Builder) WriteString(s string) (int, error)

向builder中写入内容。

Buffer类

type Buffer struct {
	// contains filtered or unexported fields
}

Buffer是bytes包提供的高效的string构建类，其比Builder更加强大。Builder的读出操作只有String()方法，Buffer支持更多的读出方法。

与Buffer类相关的函数

func NewBuffer(buf []byte) *Buffer
func NewBufferString(s string) *Buffer

分别以[]byte和string的内容为基础构建新的Buffer类实例。

大多数情况下，new(Buffer)（或者直接定义一个Buffer类型的变脸）就可以初始化一个空的Buffer。

Buffer类的成员方法

func (b *Buffer) Available() int
func (b *Buffer) Cap() int
func (b *Buffer) Len() int

空间大小相关的函数。
Cap()返回Buffer的内部空间的容量。
Available()返回还未使用的，但已经申请的空间的大小。
Len()返回写入后未读出的空间的大小。

func (b *Buffer) Grow(n int)

使得Buffer至少能再读入n个字节而不重新分配内存。

func (b *Buffer) Truncate(n int)

除了前n个未读的字节外，丢弃其他已写入的字节，不会重新分配内存。

func (b *Buffer) ReadFrom(r io.Reader) (n int64, err error)
func (b *Buffer) Write(p []byte) (n int, err error)
func (b *Buffer) WriteByte(c byte) error
func (b *Buffer) WriteRune(r rune) (n int, err error)
func (b *Buffer) WriteString(s string) (n int, err error)

向Buffer中写入内容。

与strings.Builder不同的是有一个ReadFrom()方法，该方法从一个io.Reader（这是一个接口，包含Read()方法）中读取内容，并返回读取的字节数量和出现的错误（io.EOF不视为错误，io.Reader的Read()方法返回io.EOF时ReadFrom()方法不会返回错误）。

func (b *Buffer) Bytes() []byte
func (b *Buffer) Read(p []byte) (n int, err error)
func (b *Buffer) ReadByte() (byte, error)
func (b *Buffer) ReadBytes(delim byte) (line []byte, err error)
func (b *Buffer) ReadRune() (r rune, size int, err error)
func (b *Buffer) ReadString(delim byte) (line string, err error)
func (b *Buffer) String() string
func (b *Buffer) WriteTo(w io.Writer) (n int64, err error)

Buffer类中有众多的读出方式，包括读取全部字节、读指定数量的字节、读一个字节、读到某个字节为止、读一个unicode码点、全部都出来作为一个string。

同时Buffer类还有一个与ReadFrom()对应的方法WriteTo()，该方法将字节写入到一个io.Writer（这是一个接口，包含Write()方法）中，并返回写入的字节数和出现的错误。

func (b *Buffer) AvailableBuffer() []byte

返回未写入的空闲的空间，直到下次写入操作前该空间都有效。

func (b *Buffer) UnreadByte() error
func (b *Buffer) UnreadRune() error

将上次读取的byte/rune回退到缓冲区中。

用于读取string/[]byte的类

有时候我们需要将string/[]byte当作输入，strings和bytes包中都提供了名为Reader的类，用于将string/[]byte作为输入源。

strings.Reader

type Reader struct {
	// contains filtered or unexported fields
}

实现了io.Reader, io.ReaderAt, io.ByteReader, io.ByteScanner, io.RuneReader, io.RuneScanner, io.Seeker, io.WriterTo接口，使用string作为输入源，零值的行为与以空字符串初始化相同。

strings.Reader相关函数

func NewReader(s string) *Reader

以s为输入源创建一个strings.Reader。

成员函数

func (r *Reader) Len() int // 返回还未读出的字节的数量
func (r *Reader) Read(b []byte) (n int, err error)
func (r *Reader) ReadAt(b []byte, off int64) (n int, err error)
func (r *Reader) ReadByte() (byte, error)
func (r *Reader) ReadRune() (ch rune, size int, err error)
func (r *Reader) Reset(s string) // 重置，再次从头开始读
func (r *Reader) Seek(offset int64, whence int) (int64, error) // 重新定位到某个地方，实现了io.Seeker接口
func (r *Reader) Size() int64 // 返回字符串s的初始长度
func (r *Reader) UnreadByte() error
func (r *Reader) UnreadRune() error
func (r *Reader) WriteTo(w io.Writer) (n int64, err error)

大部分语义和前面的一样，没见过的加了注释。

bytes.Reader

type Reader struct {
	// contains filtered or unexported fields
}

和strings.Reader类似，只不过使用[]byte作为输入源。

成员函数

func (r *Reader) Len() int
func (r *Reader) Read(b []byte) (n int, err error)
func (r *Reader) ReadAt(b []byte, off int64) (n int, err error)
func (r *Reader) ReadByte() (byte, error)
func (r *Reader) ReadRune() (ch rune, size int, err error)
func (r *Reader) Reset(b []byte)
func (r *Reader) Seek(offset int64, whence int) (int64, error)
func (r *Reader) Size() int64
func (r *Reader) UnreadByte() error
func (r *Reader) UnreadRune() error
func (r *Reader) WriteTo(w io.Writer) (n int64, err error)

strings.Replacer

type Replacer struct {
	// contains filtered or unexported fields
}

用于方便地替换string中的内容的辅助类，看后面的例子就明白了。

成员函数

func (r *Replacer) Replace(s string) string

将s按照预定义的规则替换，返回替换后的字符串。

func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error)

将替换后的字符串写入w

delphi string pbyte byte

string byte

类型string byte quot

string基础java byte

strings golang bytes bufio

数组string byte

语言string byte rune

数组arduino string byte

string java byte

526互联

Go标准库学习：strings和bytes

strings包和bytes包

函数

clone

内容检查类的函数，是否包含。Contains/Has.

Count

获取字串的下标 Index

相等性测试

去掉前缀、后缀，分割等函数

重复字符串

替换字符串/转换字符串

类型（type）

用于构建string的类

Builder类

Builder类的成员方法

Buffer类

与Buffer类相关的函数

Buffer类的成员方法

用于读取string/[]byte的类

strings.Reader

strings.Reader相关函数

成员函数

bytes.Reader

相关函数

成员函数

strings.Replacer

相关函数

成员函数