Felix's Library

This documentation is automatically generated by online-judge-tools/verification-helper

View the Project on GitHub fffelix-huang/CP-stuff

:heavy_check_mark: test/simd/prefix-sum/unit-test-simd-prefix-sum.test.cpp

Depends on

Code

#define PROBLEM "https://judge.yosupo.jp/problem/aplusb" // dummy

#include <iostream>
#include <vector>
#include <chrono>
#include <cassert>
#include <algorithm>
#include "../../../library/simd/prefix-sum.hpp"
#include "../../../library/random/random.hpp"
using namespace std;
using namespace felix;

__attribute__((aligned(64))) int a[10000000];
__attribute__((aligned(64))) int b[10000000];

void TEST(int n) {
	cerr << "n = " << n << endl;
	for(int i = 0; i < n; i++) {
		a[i] = rnd.next(-100, 100);
		b[i] = a[i];
	}
	std::partial_sum(b, b + n, a);
	simd::prefix_sum(b, n);
	assert(equal(a, a + n, b));
}

int main() {
	for(int i = 0; i <= 20; i++) {
		TEST(i);
	}
	for(int i = 0; i < 300; i++) {
		TEST(rnd.next(1000000, 10000000));
	}

	int a, b;
	cin >> a >> b;
	cout << a + b << "\n";
	return 0;
}
#line 1 "test/simd/prefix-sum/unit-test-simd-prefix-sum.test.cpp"
#define PROBLEM "https://judge.yosupo.jp/problem/aplusb" // dummy

#include <iostream>
#include <vector>
#include <chrono>
#include <cassert>
#include <algorithm>
#line 2 "library/simd/prefix-sum.hpp"

#include <x86intrin.h>

#include <limits>

#line 6 "library/simd/prefix-sum.hpp"

namespace felix {

namespace simd {

inline void prefix_sum(int* a, int n) {
	int last = 0;
	for(int i = 0; i + 3 < n; i += 4) {
		// Compute partial sum for each block of size 4.

		__m128i cur = _mm_load_si128((__m128i*) &a[i]);
		cur = _mm_add_epi32(cur, _mm_slli_si128(cur, 4));
		cur = _mm_add_epi32(cur, _mm_slli_si128(cur, 8));
		// Accumlate sum from previous block.

		__m128i lastv = _mm_set1_epi32(last);
		cur = _mm_add_epi32(lastv, cur);
		_mm_store_si128((__m128i*) &a[i], cur);
		last = a[i + 3];
	}
	// Compute partial sum for remaining elements.

	for(int i = (n & (~3)) + (n < 4); i < n; i++) {
		a[i] += a[i - 1];
	}
}

inline void prefix_sum(long long* a, int n) {
	long long last = 0;
	for(int i = 0; i + 1 < n; i += 2) {
		a[i + 1] += a[i];
		__m128i cur = _mm_load_si128((__m128i*) &a[i]);
		__m128i lastv = _mm_set1_epi64x(last);
		cur = _mm_add_epi64(lastv, cur);
		_mm_store_si128((__m128i*) &a[i], cur);
		last = a[i + 1];
	}
	if(n & 1) {
		a[n - 1] += a[n - 2];
	}
}

inline void prefix_max(int* a, int n) {
	int last = std::numeric_limits<int>::min();
	for(int i = 0; i + 3 < n; i += 4) {
		__m128i cur = _mm_load_si128((__m128i*) &a[i]);
		cur = _mm_max_epi32(cur, _mm_slli_si128(cur, 4));
		cur = _mm_max_epi32(cur, _mm_slli_si128(cur, 8));
		__m128i lastv = _mm_set1_epi32(last);
		cur = _mm_max_epi32(lastv, cur);
		_mm_store_si128((__m128i*) &a[i], cur);
		last = a[i + 3];
	}
	for(int i = (n & (~3)) + (n < 4); i < n; i++) {
		a[i] = std::max(a[i], a[i - 1]);
	}
}

inline void prefix_max(long long* a, int n) {
	long long last = std::numeric_limits<int>::min();
	for(int i = 0; i + 1 < n; i += 2) {
		a[i + 1] = std::max(a[i + 1], a[i]);
		__m128i cur = _mm_load_si128((__m128i*) &a[i]);
		__m128i lastv = _mm_set1_epi64x(last);
		cur = _mm_max_epi64(lastv, cur);
		_mm_store_si128((__m128i*) &a[i], cur);
		last = a[i + 1];
	}
	if(n & 1) {
		a[n - 1] = std::max(a[n - 1], a[n - 2]);
	}
}

inline void prefix_min(int* a, int n) {
	int last = std::numeric_limits<int>::max();
	for(int i = 0; i + 3 < n; i += 4) {
		__m128i cur = _mm_load_si128((__m128i*) &a[i]);
		cur = _mm_min_epi32(cur, _mm_slli_si128(cur, 4));
		cur = _mm_min_epi32(cur, _mm_slli_si128(cur, 8));
		__m128i lastv = _mm_set1_epi32(last);
		cur = _mm_min_epi32(lastv, cur);
		_mm_store_si128((__m128i*) &a[i], cur);
		last = a[i + 3];
	}
	for(int i = (n & (~3)) + (n < 4); i < n; i++) {
		a[i] = std::min(a[i], a[i - 1]);
	}
}

inline void prefix_min(long long* a, int n) {
	long long last = std::numeric_limits<int>::max();
	for(int i = 0; i + 1 < n; i += 2) {
		a[i + 1] = std::min(a[i + 1], a[i]);
		__m128i cur = _mm_load_si128((__m128i*) &a[i]);
		__m128i lastv = _mm_set1_epi64x(last);
		cur = _mm_min_epi64(lastv, cur);
		_mm_store_si128((__m128i*) &a[i], cur);
		last = a[i + 1];
	}
	if(n & 1) {
		a[n - 1] = std::min(a[n - 1], a[n - 2]);
	}
}

} // namespace simd


} // namespace felix

#line 3 "library/random/random.hpp"
#include <set>
#include <cstring>
#include <array>
#line 7 "library/random/random.hpp"
#include <numeric>
#include <climits>
#include <string>
#line 3 "library/random/splitmix64.hpp"

namespace felix {

namespace internal {

// http://xoshiro.di.unimi.it/splitmix64.c

struct splitmix64_hash {
	static unsigned long long splitmix64(unsigned long long x) {
		x += 0x9e3779b97f4a7c15;
		x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9;
		x = (x ^ (x >> 27)) * 0x94d049bb133111eb;
		return x ^ (x >> 31);
	}
 
	unsigned long long operator()(unsigned long long x) const {
		static const unsigned long long FIXED_RANDOM = std::chrono::steady_clock::now().time_since_epoch().count();
		return splitmix64(x + FIXED_RANDOM);
	}
};

} // namespace internal


} // namespace felix

#line 11 "library/random/random.hpp"

namespace felix {

struct random_t {
public:
	explicit random_t(unsigned long long seed = 3905348978240129619LL) {
		set_seed(seed);
	}

	void set_seed(unsigned long long seed) {
		for(int i = 0; i < 4; i++) {
			s[i] = internal::splitmix64_hash::splitmix64(seed);
			seed += 0x9e3779b97f4a7c15;
		}
	}

	// [0, n)
	unsigned long long next(unsigned long long n) {
		const unsigned long long LIMIT = std::numeric_limits<unsigned long long>::max() / n * n;
		unsigned long long r;
		do {
			r = next();
		} while(r >= LIMIT);
		return r % n;
	}

	// [l, r]
	template<class T>
	T next(T l, T r) {
		assert(l <= r);
		return T(l + next(r - l + 1ULL));
	}

	template<class Iter>
	void shuffle(Iter l, Iter r) {
		if(l == r) {
			return;
		}
		int pos = 0;
		for(auto it = l + 1; it != r; it++) {
			pos += 1;
			int j = next(pos + 1);
			if(j != pos) {
				std::iter_swap(it, l + j);
			}
		}
	}

	// [0, n)
	std::vector<int> perm(int n) {
		std::vector<int> a(n);
		std::iota(a.begin(), a.end(), 0);
		shuffle(a.begin(), a.end());
		return a;
	}

	// [l, r]
	template<class T>
	std::vector<T> distinct(int size, T l, T r) {
		std::vector<T> result;
		if(size == 0) {
			return result;
		}
		assert(l <= r);
		assert(size > 0);
		unsigned long long n = r - l + 1;
		assert(size <= n);
		double expected = 0;
		for(int i = 1; i <= size; i++) {
			expected += double(n) / double(n - i + 1);
		}
		if(expected < (double) n) {
			std::set<T> s;
			while((int) s.size() < size) {
				T val = T(next(l, r));
				if(s.insert(val).second) {
					result.push_back(val);
				}
			}
		} else {
			std::vector<T> p(perm(n));
			for(auto& x : p) {
				x += l;
			}
			result.insert(result.end(), p.begin(), p.begin() + size);
		}
		return result;
	}

	std::string string(int n, char min_char = 'a', char max_char = 'z') {
		std::string s(n, '_');
		for(int i = 0; i < n; i++) {
			s[i] = next(min_char, max_char);
		}
		return s;
	}

private:
	std::array<unsigned long long, 4> s;

	static unsigned long long rotl(const unsigned long long x, int k) {
		return (x << k) | (x >> (64 - k));
	}

	unsigned long long next() {
		const unsigned long long result = rotl(s[1] * 5, 7) * 9;
		const unsigned long long t = s[1] << 17;
		s[2] ^= s[0];
		s[3] ^= s[1];
		s[1] ^= s[2];
		s[0] ^= s[3];
		s[2] ^= t;
		s[3] = rotl(s[3], 45);
		return result;
	}
} rnd;

} // namespace felix
#line 10 "test/simd/prefix-sum/unit-test-simd-prefix-sum.test.cpp"
using namespace std;
using namespace felix;

__attribute__((aligned(64))) int a[10000000];
__attribute__((aligned(64))) int b[10000000];

void TEST(int n) {
	cerr << "n = " << n << endl;
	for(int i = 0; i < n; i++) {
		a[i] = rnd.next(-100, 100);
		b[i] = a[i];
	}
	std::partial_sum(b, b + n, a);
	simd::prefix_sum(b, n);
	assert(equal(a, a + n, b));
}

int main() {
	for(int i = 0; i <= 20; i++) {
		TEST(i);
	}
	for(int i = 0; i < 300; i++) {
		TEST(rnd.next(1000000, 10000000));
	}

	int a, b;
	cin >> a >> b;
	cout << a + b << "\n";
	return 0;
}
Back to top page