C# simd 性能雷点记录

发布时间 2023-04-18 02:35:30作者: ProjectDD

先看两段代码对比:

  static public T SimdDot(T[] a, T[] b) {
    if (a.Length != b.Length) throw new ArgumentException("The size of two matrix is not equal.");
    // if (a.Length == 0) throw new ArgumentException("The length of input matrix must be greater than zero.");
    var result = default(T);
    var vcount= Vector<T>.Count;
    int remaining = a.Length % vcount;
    var idx = 0;
    for (int i = 0; i < a.Length - remaining; i += vcount) {
      var va = new Vector<T>(a, idx * vcount);
      var vb = new Vector<T>(b, idx * vcount);
      result += Vector.Dot(va, vb);
      idx++;
    }
    for (int i = a.Length - remaining; i < a.Length; i++) {
      result += a[i] * b[i];
    }
    return result;
  }
  static public T SimdDot(ReadOnlySpan<T> a, ReadOnlySpan<T> b) {
    if (a.Length != b.Length) throw new ArgumentException("The size of two matrix is not equal.");
    // if (a.Length == 0) throw new ArgumentException("The length of input matrix must be greater than zero.");
    var result = default(T);
    var vcount= Vector<T>.Count;
    int remaining = a.Length % vcount;
    var idx = 0;
    for (int i = 0; i < a.Length - remaining; i += vcount) {
      var va = new Vector<T>(a.Slice(idx*vcount,vcount));
      var vb = new Vector<T>(b.Slice(idx*vcount,vcount));
      result += Vector.Dot(va, vb);
      idx++;
    }
    for (int i = a.Length - remaining; i < a.Length; i++) {
      result += a[i] * b[i];
    }
    return result;
  }

这是向量的点乘运算,下面代码的性能差于传统sisd的性能,而上面的代码略好于sisd代码,它们之间的差别在于 使用 ReadOnlySpan<T>.Slice函数,看来这是个坑点,明明它是号称性能优势的Span<T>系列啊,所以比较出乎意料。特此记上一笔。

 

这结果是使用上面代码(非下面有红字部分代码段)的测试结果,如果使用有标红部分代码来测试的话,其性能竟然会达到sisd的 3-5倍之巨。