curand
curandState_t 用来控制产生不同的随机数,用seed来初始化
curand_init(seed, index, 0, &states[index]);
device float curand_uniform (curandState_t *state) 输出一个0到1的均匀分布,
device float curand_normal (curandState_t *state) 输出一个0到1的正太分布,
thrust
thrust :: host_vector <int > H (4);
// Copy host_vector H to device_vector D
thrust :: device_vector <int > D = H;
thrust :: device_vector <int > D(10 , 1);
// set the first seven elements of a vector to 9
thrust :: fill (D. begin () , D. begin () + 7, 9);
// initialize a host_vector with the first five elements of D
thrust :: host_vector <int > H(D. begin () , D. begin () + 5);
// set the elements of H to 0, 1, 2, 3, ...
thrust :: sequence (H. begin () , H. end ());
// copy all of H back to the beginning of D
thrust :: copy (H. begin () , H. end () , D. begin ());
thrust::raw_pointer_cast
//返回内部数据指针
比如,计算y=a * x + y,其中x、y为向量,a为常数。这其实就是我们所熟知的由BLAS提供的SAXPY(a*x+y)运算。如果我们在thrust中实现SAXPY我们有几个选择。
struct saxpy_functor
{
const float a;
saxpy_functor(float _a) : a(_a) {}
__host__ __device__
float operator()(const float& x, const float& y) const
{
return a * x + y;
}
};
void saxpy_fast(float A, thrust::device_vector<float>& X, thrust::device_vector<float>& Y)
{
// Y <- A * X + Y
thrust::transform(X.begin(), X.end(), Y.begin(), Y.begin(), saxpy_functor(A));
}
void saxpy_slow(float A, thrust::device_vector<float>& X, thrust::device_vector<float>& Y)
{
thrust::device_vector<float> temp(X.size());
// temp <- A
thrust::fill(temp.begin(), temp.end(), A);
// temp <- A * X
thrust::transform(X.begin(), X.end(), temp.begin(), temp.begin(), thrust::multiplies<float>());
// Y <- A * X + Y
thrust::transform(temp.begin(), temp.end(), Y.begin(), Y.begin(), thrust::plus<float>());
}
make_zip_iterator
#include <thrust/iterator/zip_iterator.h>
...
// initialize vectors
thrust::device_vector<int> A(3);
thrust::device_vector<char> B(3);
A[0] = 10; A[1] = 20; A[2] = 30;
B[0] = 'x'; B[1] = 'y'; B[2] = 'z';
// create iterator (type omitted)
first = thrust::make_zip_iterator(thrust::make_tuple(A.begin(), B.begin()));
last = thrust::make_zip_iterator(thrust::make_tuple(A.end(), B.end()));
first[0] // returns tuple(10, 'x')
first[1] // returns tuple(20, 'y')
first[2] // returns tuple(30, 'z')
// maximum of [first, last)
thrust::maximum< thrust::tuple<int,char> > binary_op;
thrust::tuple<int,char> init = first[0];
thrust::reduce(first, last, init, binary_op); // returns tuple(30, 'z')