WEB开发网
开发学院软件开发C++ Pentium III处理器的单指令多数据流扩展指令(3)... 阅读

Pentium III处理器的单指令多数据流扩展指令(3)

 2010-10-15 09:08:09 来源:Web开发网   
核心提示:4.3 4x4矩阵下面的例子介绍一个4x4的矩阵.矩阵被封装在一个类中.在下面类的函数里面调用了intrinsics库.类声明.float const sEPSILON = 1.0e-10f;union sse16 {__m128 m[4];float f[4][4];};class sMatrix4 {protect

4.3 4x4矩阵

下面的例子介绍一个4x4的矩阵.矩阵被封装在一个类中.在下面类的函数里面调用了intrinsics库.

类声明.

float const sEPSILON = 1.0e-10f;
union sse16 {
  __m128 m[4];
  float f[4][4];
};
class sMatrix4 {
protected:
  sse16 val;
  sse4 sFuzzy;
public:
  sMatrix4(float*);
  float& operator()(int, int);
  sMatrix4& operator +=(const sMatrix4&);
  bool operator ==(const sMatrix4&) const;
  sVector4 operator *(const sVector4&) const;
private:
  float RCD(const sMatrix4& B, int i, int j) const;
};

类实现.

sMatrix4::sMatrix4(float* fv) {
  val.m[0] = _mm_set_ps(fv[3], fv[2], fv[1], fv[0]);
  val.m[1] = _mm_set_ps(fv[7], fv[6], fv[5], fv[4]);
  val.m[2] = _mm_set_ps(fv[11], fv[10], fv[9], fv[8]);
  val.m[3] = _mm_set_ps(fv[15], fv[14], fv[13], fv[12]);
  float f = sEPSILON;
  sFuzzy.m = _mm_set_ps(f, f, f, f);
}
float& sMatrix4::operator()(int i, int j) {
  return val.f[i][j];
}
sMatrix4& sMatrix4::operator +=(const sMatrix4& M) {
  val.m[0] = _mm_add_ps(val.m[0], M.val.m[0]);
  val.m[1] = _mm_add_ps(val.m[1], M.val.m[1]);
  val.m[2] = _mm_add_ps(val.m[2], M.val.m[2]);
  val.m[3] = _mm_add_ps(val.m[3], M.val.m[3]);
  return *this;
}
bool sMatrix4::operator ==(const sMatrix4& M) const {
  int res[4];
  res[0] = res[1] = res[2] = res[3] = 0;
  res[0] = _mm_movemask_ps(_mm_cmplt_ps(_mm_sub_ps(
    _mm_max_ps(val.m[0], M.val.m[0]),
    _mm_min_ps(val.m[0], M.val.m[0])), sFuzzy.m));
  res[1] = _mm_movemask_ps(_mm_cmplt_ps(_mm_sub_ps(
    _mm_max_ps(val.m[1], M.val.m[1]),
    _mm_min_ps(val.m[1], M.val.m[1])), sFuzzy.m));
  res[2] = _mm_movemask_ps(_mm_cmplt_ps(_mm_sub_ps(
    _mm_max_ps(val.m[2], M.val.m[2]),
    _mm_min_ps(val.m[2], M.val.m[2])), sFuzzy.m));
  res[3] = _mm_movemask_ps(_mm_cmplt_ps(_mm_sub_ps(
    _mm_max_ps(val.m[3], M.val.m[3]),
    _mm_min_ps(val.m[3], M.val.m[3])), sFuzzy.m));
  if ( (15 == res[0]) && (15 == res[1])
      && (15 == res[2]) && (15 == res[3]) )
    return 1;
  return 0;
}
sVector4 sMatrix4::operator *(const sVector4& v) const {
  return sVector4(
    val.f[0][0] * v[0] + val.f[0][1] * v[1]
      + val.f[0][2] * v[2] + val.f[0][3] * v[3],
    val.f[1][0] * v[0] + val.f[1][1] * v[1]
      + val.f[1][2] * v[2] + val.f[1][3] * v[3],
    val.f[2][0] * v[0] + val.f[2][1] * v[1]
      + val.f[2][2] * v[2] + val.f[2][3] * v[3],
    val.f[3][0] * v[0] + val.f[3][1] * v[1]
      + val.f[3][2] * v[2] + val.f[3][3] * v[3]);
}
float sMatrix4::RCD(const sMatrix4& B, int i, int j) const {
  return val.f[i][0] * B.val.f[0][j] + val.f[i][1] * B.val.f[1][j]
    + val.f[i][2] * B.val.f[2][j] + val.f[i][3] * B.val.f[3][j];
}

上一页  2 3 4 5 6 7 8  下一页

Tags:Pentium III 处理器

编辑录入:爽爽 [复制链接] [打 印]
赞助商链接