init
This commit is contained in:
commit
ea7dcba939
6 changed files with 481 additions and 0 deletions
81
cpp_omp/main.cpp
Normal file
81
cpp_omp/main.cpp
Normal file
|
@ -0,0 +1,81 @@
|
|||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
static void init_vec(std::vector<float> &v, uint64_t seed) {
|
||||
std::mt19937 rng(static_cast<uint32_t>(seed));
|
||||
std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
|
||||
for (auto &x : v) x = dist(rng);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
size_t N = 100000000;
|
||||
int iters = 10;
|
||||
if (argc >= 2) N = static_cast<size_t>(std::stoull(argv[1]));
|
||||
if (argc >= 3) iters = std::stoi(argv[2]);
|
||||
|
||||
std::vector<float> A(N), B(N), C(N), D(N), E(N);
|
||||
init_vec(A, 1);
|
||||
init_vec(B, 2);
|
||||
init_vec(C, 3);
|
||||
|
||||
volatile float sink = 0.0f;
|
||||
#pragma omp parallel for reduction(+:sink) if(N>10000)
|
||||
for (int i = 0; i < std::min<size_t>(N, 1000); ++i) {
|
||||
float d = A[i] + B[i];
|
||||
float e = d * C[i] + B[i];
|
||||
sink += e;
|
||||
}
|
||||
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
double sum_result = 0.0;
|
||||
for (int it = 0; it < iters; ++it) {
|
||||
#pragma omp parallel for if(N>10000)
|
||||
for (long long i = 0; i < static_cast<long long>(N); ++i) {
|
||||
D[i] = A[i] + B[i];
|
||||
}
|
||||
#pragma omp parallel for if(N>10000)
|
||||
for (long long i = 0; i < static_cast<long long>(N); ++i) {
|
||||
E[i] = D[i] * C[i] + B[i];
|
||||
}
|
||||
double local_sum = 0.0;
|
||||
#pragma omp parallel
|
||||
{
|
||||
double thread_sum = 0.0;
|
||||
#pragma omp for nowait
|
||||
for (long long i = 0; i < static_cast<long long>(N); ++i) {
|
||||
thread_sum += static_cast<double>(E[i]);
|
||||
}
|
||||
#pragma omp atomic
|
||||
sum_result += thread_sum;
|
||||
}
|
||||
}
|
||||
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
double ms = std::chrono::duration<double, std::milli>(end - start).count();
|
||||
|
||||
double bytes_per_iter = 7.0 * N * sizeof(float);
|
||||
double gbps = (bytes_per_iter * iters) / (ms / 1000.0) / 1e9;
|
||||
|
||||
int threads = 1;
|
||||
#ifdef _OPENMP
|
||||
threads = omp_get_max_threads();
|
||||
#endif
|
||||
|
||||
std::cout << "CPP OpenMP\n";
|
||||
std::cout << "threads=" << threads << "\n";
|
||||
std::cout << "N=" << N << " iters=" << iters << "\n";
|
||||
std::cout << "time_ms=" << ms << "\n";
|
||||
std::cout << "throughput_GBps=" << gbps << "\n";
|
||||
std::cout << "result=" << sum_result << "\n";
|
||||
return (sink == 12345.678f) ? 0 : 0;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue