【代码片段分享】比 url.QueryEscape 快 7.33 倍的 FastQueryEscape

发布时间 2023-06-11 16:54:27作者: ahfuzhang

作者:张富春(ahfuzhang),转载时请注明作者和引用链接,谢谢!


做 profile 发现 url.QueryEscape 占用的 CPU 时间较多,于是搜索到了一个资料:net/url: optimize unescape and escape.
于是在这个代码的基础上改了FastQueryString的版本。
在 Macbook pro m2 上测试:

  • url.QueryEscape() 281.5 ns/op
  • FastQueryEscape() 38.40 ns/op, 快7.33 倍

具体代码如下:query_escape.go

package stringsutil

import (
	"bytes"
    "reflect"
	"unsafe"
)

func shouldPathEscape(c byte) bool {
	if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
		return false
	}
	switch c {
	case '-', '_', '.', '~': // =  & ':', '@', '+', '$'
		return false
	}
	return true
}

var (
	shouldEscapeLUT [256]bool
)

func init() {
	for i := 0; i < 256; i++ {
		shouldEscapeLUT[i] = shouldPathEscape(byte(i))
	}
}

// FastQueryEscape fast version
func FastQueryEscape(s string, buf *bytes.Buffer) []byte {
	hexCount := 0
	for i := 0; i < len(s); i++ {
		if shouldEscapeLUT[s[i]] {
			hexCount++
		}
	}
	if hexCount == 0 {
		return NoAllocBytes(s)
	}
	total := len(s) + 2*hexCount
	if buf.Cap() < total {
		buf.Grow(total * 2)
	}
	t := buf.Bytes()[:total]
	j := 0
	for i := 0; i < len(s); i++ {
		c := s[i]
		if shouldEscapeLUT[c] {
			t[j] = '%'
			t[j+1] = "0123456789ABCDEF"[c>>4]
			t[j+2] = "0123456789ABCDEF"[c&15]
			j += 3
		} else {
			t[j] = c
			j++
		}
	}
	return t
}

// copy from prometheus source code

// NoAllocString convert []byte to string
func NoAllocString(buf []byte) string {
	return *(*string)(unsafe.Pointer(&buf))
}

// NoAllocBytes convert string to []byte
func NoAllocBytes(buf string) []byte {
	// not safe: return *(*[]byte)(unsafe.Pointer(&buf))
	x := (*reflect.StringHeader)(unsafe.Pointer(&buf))
	h := reflect.SliceHeader{Data: x.Data, Len: x.Len, Cap: x.Len}
	// nolint:all
	return *(*[]byte)(unsafe.Pointer(&h))
}

使用的时候要重用 bytes.Buffer 对象。例如如下:

var testStr = "a=1&b=2&%%?_-/中文__-_.~:@+$"
// a%3D1%26b%3D2%26%25%25%3F_-%2F%E4%B8%AD%E6%96%87__-_.~%3A%40%2B%24

func TestFastQueryEscape(t *testing.T) {
	s := url.QueryEscape(testStr)
	t.Logf("%s", s)
	target := &bytes.Buffer{}
	target.Grow(1024)
	out := FastQueryEscape(testStr, target)
	t.Logf("%s", NoAllocString(out))
	if s != NoAllocString(out) {
		t.Error("not match")
	}
}