Skip to content

Instantly share code, notes, and snippets.

@Mukundan314
Created December 12, 2024 12:18
Show Gist options
  • Save Mukundan314/709e56c72d248d34f81d9ac1dbfad92d to your computer and use it in GitHub Desktop.
Save Mukundan314/709e56c72d248d34f81d9ac1dbfad92d to your computer and use it in GitHub Desktop.
#define NDEBUG
#pragma GCC optimize("Ofast", "unroll-loops")
#pragma GCC target("avx2", "bmi", "bmi2", "popcnt", "lzcnt")
#include <bits/stdc++.h>
#include <x86intrin.h>
const int MAX_N = 200'000;
template< class T >
constexpr std::size_t tuple_size_v = std::tuple_size<T>::value;
/*nor's fastio: https://judge.yosupo.jp/submission/75233*/
struct IOPre {
static constexpr int TEN = 10, SZ = TEN * TEN * TEN * TEN;
std::array<char, 4 * SZ> num;
constexpr IOPre() : num{} {
for (int i = 0; i < SZ; i++) {
int n = i;
for (int j = 3; j >= 0; j--) {
num[i * 4 + j] = static_cast<char>(n % TEN + '0');
n /= TEN;
}
}
}
};
struct IO {
static constexpr int SZ = 1 << 17, LEN = 32, TEN = 10, HUNDRED = TEN * TEN, THOUSAND = HUNDRED * TEN,
TENTHOUSAND = THOUSAND * TEN, MAGIC_MULTIPLY = 205, MAGIC_SHIFT = 11, MASK = 15, TWELVE = 12,
SIXTEEN = 16;
static constexpr IOPre io_pre = {};
std::array<char, SZ> input_buffer, output_buffer;
int input_ptr_left, input_ptr_right, output_ptr_right;
IO() : input_buffer{}, output_buffer{}, input_ptr_left{}, input_ptr_right{}, output_ptr_right{} {}
IO(const IO &) = delete;
IO(IO &&) = delete;
IO &operator=(const IO &) = delete;
IO &operator=(IO &&) = delete;
~IO() { flush(); }
template <class T> struct is_char {
static constexpr bool value = std::is_same_v<T, char>;
};
template <class T> struct is_bool {
static constexpr bool value = std::is_same_v<T, bool>;
};
template <class T> struct is_string {
static constexpr bool value = std::is_same_v<T, std::string> || std::is_same_v<T, const char *> ||
std::is_same_v<T, char *> || std::is_same_v<std::decay_t<T>, char *>;
;
};
template <class T, class D = void> struct is_custom {
static constexpr bool value = false;
};
template <class T> struct is_custom<T, std::void_t<typename T::internal_value_type>> {
static constexpr bool value = true;
};
template <class T> struct is_default {
static constexpr bool value =
is_char<T>::value || is_bool<T>::value || is_string<T>::value || std::is_integral_v<T>;
};
template <class T, class D = void> struct is_iterable {
static constexpr bool value = false;
};
template <class T> struct is_iterable<T, typename std::void_t<decltype(std::begin(std::declval<T>()))>> {
static constexpr bool value = true;
};
template <class T, class D = void, class E = void> struct is_applyable {
static constexpr bool value = false;
};
template <class T>
struct is_applyable<T, std::void_t<typename std::tuple_size<T>::type>,
std::void_t<decltype(std::get<0>(std::declval<T>()))>> {
static constexpr bool value = true;
};
template <class T>
static constexpr bool needs_newline = (is_iterable<T>::value || is_applyable<T>::value) && (!is_default<T>::value);
template <typename T, typename U> struct any_needs_newline {
static constexpr bool value = false;
};
template <typename T> struct any_needs_newline<T, std::index_sequence<>> {
static constexpr bool value = false;
};
template <typename T, std::size_t I, std::size_t... Is> struct any_needs_newline<T, std::index_sequence<I, Is...>> {
static constexpr bool value = needs_newline<decltype(std::get<I>(std::declval<T>()))> ||
any_needs_newline<T, std::index_sequence<Is...>>::value;
};
inline void load() {
memmove(std::begin(input_buffer), std::begin(input_buffer) + input_ptr_left, input_ptr_right - input_ptr_left);
input_ptr_right = input_ptr_right - input_ptr_left +
static_cast<int>(fread_unlocked(std::begin(input_buffer) + input_ptr_right - input_ptr_left, 1,
SZ - input_ptr_right + input_ptr_left, stdin));
input_ptr_left = 0;
}
inline void read_char(char &c) {
if (input_ptr_left + LEN > input_ptr_right)
load();
c = input_buffer[input_ptr_left++];
}
inline void read_string(std::string &x) {
char c;
while (read_char(c), c < '!')
continue;
x = c;
while (read_char(c), c >= '!')
x += c;
}
template <class T> inline std::enable_if_t<std::is_integral_v<T>, void> read_int(T &x) {
if (input_ptr_left + LEN > input_ptr_right)
load();
char c = 0;
do
c = input_buffer[input_ptr_left++];
while (c < '-');
[[maybe_unused]] bool minus = false;
if constexpr (std::is_signed<T>::value == true)
if (c == '-')
minus = true, c = input_buffer[input_ptr_left++];
x = 0;
while (c >= '0')
x = x * TEN + (c & MASK), c = input_buffer[input_ptr_left++];
if constexpr (std::is_signed<T>::value == true)
if (minus)
x = -x;
}
inline void skip_space() {
if (input_ptr_left + LEN > input_ptr_right)
load();
while (input_buffer[input_ptr_left] <= ' ')
input_ptr_left++;
}
inline void flush() {
fwrite_unlocked(std::begin(output_buffer), 1, output_ptr_right, stdout);
output_ptr_right = 0;
}
inline void write_char(char c) {
if (output_ptr_right > SZ - LEN)
flush();
output_buffer[output_ptr_right++] = c;
}
inline void write_bool(bool b) {
if (output_ptr_right > SZ - LEN)
flush();
output_buffer[output_ptr_right++] = b ? '1' : '0';
}
inline void write_string(const std::string &s) {
for (auto x : s)
write_char(x);
}
inline void write_string(const char *s) {
while (*s)
write_char(*s++);
}
inline void write_string(char *s) {
while (*s)
write_char(*s++);
}
template <typename T> inline std::enable_if_t<std::is_integral_v<T>, void> write_int(T x) {
if (output_ptr_right > SZ - LEN)
flush();
if (!x) {
output_buffer[output_ptr_right++] = '0';
return;
}
if constexpr (std::is_signed<T>::value == true)
if (x < 0)
output_buffer[output_ptr_right++] = '-', x = -x;
int i = TWELVE;
std::array<char, SIXTEEN> buf{};
while (x >= TENTHOUSAND) {
memcpy(std::begin(buf) + i, std::begin(io_pre.num) + (x % TENTHOUSAND) * 4, 4);
x /= TENTHOUSAND;
i -= 4;
}
if (x < HUNDRED) {
if (x < TEN) {
output_buffer[output_ptr_right++] = static_cast<char>('0' + x);
} else {
std::uint32_t q = (static_cast<std::uint32_t>(x) * MAGIC_MULTIPLY) >> MAGIC_SHIFT;
std::uint32_t r = static_cast<std::uint32_t>(x) - q * TEN;
output_buffer[output_ptr_right] = static_cast<char>('0' + q);
output_buffer[output_ptr_right + 1] = static_cast<char>('0' + r);
output_ptr_right += 2;
}
} else {
if (x < THOUSAND) {
memcpy(std::begin(output_buffer) + output_ptr_right, std::begin(io_pre.num) + (x << 2) + 1, 3),
output_ptr_right += 3;
} else {
memcpy(std::begin(output_buffer) + output_ptr_right, std::begin(io_pre.num) + (x << 2), 4),
output_ptr_right += 4;
}
}
memcpy(std::begin(output_buffer) + output_ptr_right, std::begin(buf) + i + 4, TWELVE - i);
output_ptr_right += TWELVE - i;
}
template <typename T_> IO &operator<<(T_ &&x) {
using T = typename std::remove_cv<typename std::remove_reference<T_>::type>::type;
static_assert(is_custom<T>::value or is_default<T>::value or is_iterable<T>::value or is_applyable<T>::value);
if constexpr (is_custom<T>::value) {
write_int(x.get());
} else if constexpr (is_default<T>::value) {
if constexpr (is_bool<T>::value) {
write_bool(x);
} else if constexpr (is_string<T>::value) {
write_string(x);
} else if constexpr (is_char<T>::value) {
write_char(x);
} else if constexpr (std::is_integral_v<T>) {
write_int(x);
}
} else if constexpr (is_iterable<T>::value) {
using E = decltype(*std::begin(x));
constexpr char sep = needs_newline<E> ? '\n' : ' ';
int i = 0;
for (const auto &y : x) {
if (i++)
write_char(sep);
operator<<(y);
}
} else if constexpr (is_applyable<T>::value) {
constexpr char sep = (any_needs_newline<T, std::make_index_sequence<tuple_size_v<T>>>::value) ? '\n' : ' ';
int i = 0;
std::apply([this, &sep, &i](auto const &...y) { (((i++ ? write_char(sep) : void()), this->operator<<(y)), ...); },
x);
}
return *this;
}
template <typename T> IO &operator>>(T &x) {
static_assert(is_custom<T>::value or is_default<T>::value or is_iterable<T>::value or is_applyable<T>::value);
static_assert(!is_bool<T>::value);
if constexpr (is_custom<T>::value) {
typename T::internal_value_type y;
read_int(y);
x = y;
} else if constexpr (is_default<T>::value) {
if constexpr (is_string<T>::value) {
read_string(x);
} else if constexpr (is_char<T>::value) {
read_char(x);
} else if constexpr (std::is_integral_v<T>) {
read_int(x);
}
} else if constexpr (is_iterable<T>::value) {
for (auto &y : x)
operator>>(y);
} else if constexpr (is_applyable<T>::value) {
std::apply([this](auto &...y) { ((this->operator>>(y)), ...); }, x);
}
return *this;
}
IO *tie(std::nullptr_t) { return this; }
void sync_with_stdio(bool) {}
};
IO io;
#define cin io
#define cout io
typedef uint64_t v4ull __attribute__((vector_size(32)));
// Based on https://en.algorithmica.org/hpc/algorithms/prefix/
inline void prefix_x2(int64_t *p) {
__m256i x = _mm256_load_si256((__m256i *)p);
x = _mm256_add_epi64(x, _mm256_slli_si256(x, 8));
_mm256_store_si256((__m256i *)p, x);
}
inline __m128i update(int64_t *p, __m128i s) {
__m128i d = _mm_set1_epi64x(p[1]);
__m128i x = _mm_load_si128((__m128i *)p);
x = _mm_add_epi64(s, x);
_mm_store_si128((__m128i *)p, x);
return _mm_add_epi64(s, d);
}
template <int N> inline void prefix(int64_t *a) {
for (int i = 0; i < N; i += 4)
prefix_x2(&a[i]);
__m128i s = _mm_setzero_si128();
for (int i = 0; i < N; i += 2)
s = update(&a[i], s);
}
// Based on https://en.algorithmica.org/hpc/data-structures/segment-trees/
template <int N, int b = 5> struct WideSegmentTree {
constexpr static int B = 1 << b;
struct Precalc {
alignas(64) uint64_t mask[B][B];
constexpr Precalc() : mask{} {
for (int k = 0; k < B; k++)
for (int i = 0; i < B; i++)
mask[k][i] = (i > k ? -1 : 0);
}
};
constexpr static Precalc T{};
constexpr static int height(int n) { return (n <= B ? 1 : height(n / B) + 1); }
constexpr static int offset(int h) {
int s = 0, n = N;
while (h--) {
n = (n + B - 1) / B;
s += n * B + B; // extra padding to make build easier to implement
}
return s;
}
constexpr static int H = height(N);
alignas(64) int64_t data[offset(H)];
inline void build() {
for (int h = 0; h < H - 1; h++) {
const int s = offset(h + 1) - offset(h) - B; // outer loop isn't unrolled properly without this
for (int i = 0; i < s; i += B) {
prefix<B>(&data[offset(h) + i]);
data[offset(h + 1) + (i >> b) + 1] = data[offset(h) + i + B - 1] + data[offset(h) + i + B];
data[offset(h) + i + B] = 0;
}
}
prefix<B>(&data[offset(H - 1)]);
}
inline int64_t sum(int k) const {
int64_t res = 0;
for (int h = H - 1; h >= 0; h--)
res += data[offset(h) + (k >> (h * b))];
return res;
}
inline void add(int k, int _x) {
v4ull x = _x + v4ull{};
for (int h = H - 1; h >= 0; h--) {
int p = k >> (h * b);
auto l = (v4ull *)&data[offset(h) + (p & ~(B - 1))];
auto m = (v4ull *)T.mask[p & (B - 1)];
for (int i = 0; i < B / 4; i++)
l[i] += x & m[i];
}
}
};
WideSegmentTree<MAX_N> s;
int x[MAX_N];
int main() {
uint32_t n, q;
cin >> n >> q;
for (int i = 0; i < n; i++) {
cin >> x[i];
s.data[s.offset(0) + i + 1] = x[i];
}
s.build();
for (int i = 0; i < q; i++) {
char t;
cin >> t;
if (t == '1') {
uint32_t k, u;
cin >> k >> u, k--;
s.add(k, u - x[k]);
x[k] = u;
} else {
uint32_t a, b;
cin >> a >> b, a--;
cout << s.sum(b) - s.sum(a) << '\n';
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment