Mukundan314 · December 12, 2024 12:18
diff --git a/segtree.cpp b/segtree.cpp
 #define NDEBUG

 #pragma GCC optimize("Ofast", "unroll-loops")
 #pragma GCC target("avx2", "bmi", "bmi2", "popcnt", "lzcnt")

 #include <bits/stdc++.h>
 #include <x86intrin.h>

 const int MAX_N = 200'000;

 template< class T >
 constexpr std::size_t tuple_size_v = std::tuple_size<T>::value;

 /*nor's fastio: https://judge.yosupo.jp/submission/75233*/
 struct IOPre {
  static constexpr int TEN = 10, SZ = TEN * TEN * TEN * TEN;
  std::array<char, 4 * SZ> num;
  constexpr IOPre() : num{} {
    for (int i = 0; i < SZ; i++) {
      int n = i;
      for (int j = 3; j >= 0; j--) {
        num[i * 4 + j] = static_cast<char>(n % TEN + '0');
        n /= TEN;
      }
    }
  }
 };
 struct IO {
  static constexpr int SZ = 1 << 17, LEN = 32, TEN = 10, HUNDRED = TEN * TEN, THOUSAND = HUNDRED * TEN,
                       TENTHOUSAND = THOUSAND * TEN, MAGIC_MULTIPLY = 205, MAGIC_SHIFT = 11, MASK = 15, TWELVE = 12,
                       SIXTEEN = 16;
  static constexpr IOPre io_pre = {};
  std::array<char, SZ> input_buffer, output_buffer;
  int input_ptr_left, input_ptr_right, output_ptr_right;
  IO() : input_buffer{}, output_buffer{}, input_ptr_left{}, input_ptr_right{}, output_ptr_right{} {}
  IO(const IO &) = delete;
  IO(IO &&) = delete;
  IO &operator=(const IO &) = delete;
  IO &operator=(IO &&) = delete;
  ~IO() { flush(); }
  template <class T> struct is_char {
    static constexpr bool value = std::is_same_v<T, char>;
  };
  template <class T> struct is_bool {
    static constexpr bool value = std::is_same_v<T, bool>;
  };
  template <class T> struct is_string {
    static constexpr bool value = std::is_same_v<T, std::string> || std::is_same_v<T, const char *> ||
                                  std::is_same_v<T, char *> || std::is_same_v<std::decay_t<T>, char *>;
    ;
  };
  template <class T, class D = void> struct is_custom {
    static constexpr bool value = false;
  };
  template <class T> struct is_custom<T, std::void_t<typename T::internal_value_type>> {
    static constexpr bool value = true;
  };
  template <class T> struct is_default {
    static constexpr bool value =
        is_char<T>::value || is_bool<T>::value || is_string<T>::value || std::is_integral_v<T>;
  };
  template <class T, class D = void> struct is_iterable {
    static constexpr bool value = false;
  };
  template <class T> struct is_iterable<T, typename std::void_t<decltype(std::begin(std::declval<T>()))>> {
    static constexpr bool value = true;
  };
  template <class T, class D = void, class E = void> struct is_applyable {
    static constexpr bool value = false;
  };
  template <class T>
  struct is_applyable<T, std::void_t<typename std::tuple_size<T>::type>,
                      std::void_t<decltype(std::get<0>(std::declval<T>()))>> {
    static constexpr bool value = true;
  };
  template <class T>
  static constexpr bool needs_newline = (is_iterable<T>::value || is_applyable<T>::value) && (!is_default<T>::value);
  template <typename T, typename U> struct any_needs_newline {
    static constexpr bool value = false;
  };
  template <typename T> struct any_needs_newline<T, std::index_sequence<>> {
    static constexpr bool value = false;
  };
  template <typename T, std::size_t I, std::size_t... Is> struct any_needs_newline<T, std::index_sequence<I, Is...>> {
    static constexpr bool value = needs_newline<decltype(std::get<I>(std::declval<T>()))> ||
                                  any_needs_newline<T, std::index_sequence<Is...>>::value;
  };
  inline void load() {
    memmove(std::begin(input_buffer), std::begin(input_buffer) + input_ptr_left, input_ptr_right - input_ptr_left);
    input_ptr_right = input_ptr_right - input_ptr_left +
                      static_cast<int>(fread_unlocked(std::begin(input_buffer) + input_ptr_right - input_ptr_left, 1,
                                                      SZ - input_ptr_right + input_ptr_left, stdin));
    input_ptr_left = 0;
  }
  inline void read_char(char &c) {
    if (input_ptr_left + LEN > input_ptr_right)
      load();
    c = input_buffer[input_ptr_left++];
  }
  inline void read_string(std::string &x) {
    char c;
    while (read_char(c), c < '!')
      continue;
    x = c;
    while (read_char(c), c >= '!')
      x += c;
  }
  template <class T> inline std::enable_if_t<std::is_integral_v<T>, void> read_int(T &x) {
    if (input_ptr_left + LEN > input_ptr_right)
      load();
    char c = 0;
    do
      c = input_buffer[input_ptr_left++];
    while (c < '-');
    [[maybe_unused]] bool minus = false;
    if constexpr (std::is_signed<T>::value == true)
      if (c == '-')
        minus = true, c = input_buffer[input_ptr_left++];
    x = 0;
    while (c >= '0')
      x = x * TEN + (c & MASK), c = input_buffer[input_ptr_left++];
    if constexpr (std::is_signed<T>::value == true)
      if (minus)
        x = -x;
  }
  inline void skip_space() {
    if (input_ptr_left + LEN > input_ptr_right)
      load();
    while (input_buffer[input_ptr_left] <= ' ')
      input_ptr_left++;
  }
  inline void flush() {
    fwrite_unlocked(std::begin(output_buffer), 1, output_ptr_right, stdout);
    output_ptr_right = 0;
  }
  inline void write_char(char c) {
    if (output_ptr_right > SZ - LEN)
      flush();
    output_buffer[output_ptr_right++] = c;
  }
  inline void write_bool(bool b) {
    if (output_ptr_right > SZ - LEN)
      flush();
    output_buffer[output_ptr_right++] = b ? '1' : '0';
  }
  inline void write_string(const std::string &s) {
    for (auto x : s)
      write_char(x);
  }
  inline void write_string(const char *s) {
    while (*s)
      write_char(*s++);
  }
  inline void write_string(char *s) {
    while (*s)
      write_char(*s++);
  }
  template <typename T> inline std::enable_if_t<std::is_integral_v<T>, void> write_int(T x) {
    if (output_ptr_right > SZ - LEN)
      flush();
    if (!x) {
      output_buffer[output_ptr_right++] = '0';
      return;
    }
    if constexpr (std::is_signed<T>::value == true)
      if (x < 0)
        output_buffer[output_ptr_right++] = '-', x = -x;
    int i = TWELVE;
    std::array<char, SIXTEEN> buf{};
    while (x >= TENTHOUSAND) {
      memcpy(std::begin(buf) + i, std::begin(io_pre.num) + (x % TENTHOUSAND) * 4, 4);
      x /= TENTHOUSAND;
      i -= 4;
    }
    if (x < HUNDRED) {
      if (x < TEN) {
        output_buffer[output_ptr_right++] = static_cast<char>('0' + x);
      } else {
        std::uint32_t q = (static_cast<std::uint32_t>(x) * MAGIC_MULTIPLY) >> MAGIC_SHIFT;
        std::uint32_t r = static_cast<std::uint32_t>(x) - q * TEN;
        output_buffer[output_ptr_right] = static_cast<char>('0' + q);
        output_buffer[output_ptr_right + 1] = static_cast<char>('0' + r);
        output_ptr_right += 2;
      }
    } else {
      if (x < THOUSAND) {
        memcpy(std::begin(output_buffer) + output_ptr_right, std::begin(io_pre.num) + (x << 2) + 1, 3),
            output_ptr_right += 3;
      } else {
        memcpy(std::begin(output_buffer) + output_ptr_right, std::begin(io_pre.num) + (x << 2), 4),
            output_ptr_right += 4;
      }
    }
    memcpy(std::begin(output_buffer) + output_ptr_right, std::begin(buf) + i + 4, TWELVE - i);
    output_ptr_right += TWELVE - i;
  }
  template <typename T_> IO &operator<<(T_ &&x) {
    using T = typename std::remove_cv<typename std::remove_reference<T_>::type>::type;
    static_assert(is_custom<T>::value or is_default<T>::value or is_iterable<T>::value or is_applyable<T>::value);
    if constexpr (is_custom<T>::value) {
      write_int(x.get());
    } else if constexpr (is_default<T>::value) {
      if constexpr (is_bool<T>::value) {
        write_bool(x);
      } else if constexpr (is_string<T>::value) {
        write_string(x);
      } else if constexpr (is_char<T>::value) {
        write_char(x);
      } else if constexpr (std::is_integral_v<T>) {
        write_int(x);
      }
    } else if constexpr (is_iterable<T>::value) {
      using E = decltype(*std::begin(x));
      constexpr char sep = needs_newline<E> ? '\n' : ' ';
      int i = 0;
      for (const auto &y : x) {
        if (i++)
          write_char(sep);
        operator<<(y);
      }
    } else if constexpr (is_applyable<T>::value) {
      constexpr char sep = (any_needs_newline<T, std::make_index_sequence<tuple_size_v<T>>>::value) ? '\n' : ' ';
      int i = 0;
      std::apply([this, &sep, &i](auto const &...y) { (((i++ ? write_char(sep) : void()), this->operator<<(y)), ...); },
                 x);
    }
    return *this;
  }
  template <typename T> IO &operator>>(T &x) {
    static_assert(is_custom<T>::value or is_default<T>::value or is_iterable<T>::value or is_applyable<T>::value);
    static_assert(!is_bool<T>::value);
    if constexpr (is_custom<T>::value) {
      typename T::internal_value_type y;
      read_int(y);
      x = y;
    } else if constexpr (is_default<T>::value) {
      if constexpr (is_string<T>::value) {
        read_string(x);
      } else if constexpr (is_char<T>::value) {
        read_char(x);
      } else if constexpr (std::is_integral_v<T>) {
        read_int(x);
      }
    } else if constexpr (is_iterable<T>::value) {
      for (auto &y : x)
        operator>>(y);
    } else if constexpr (is_applyable<T>::value) {
      std::apply([this](auto &...y) { ((this->operator>>(y)), ...); }, x);
    }
    return *this;
  }
  IO *tie(std::nullptr_t) { return this; }
  void sync_with_stdio(bool) {}
 };
 IO io;

 #define cin io
 #define cout io

 typedef uint64_t v4ull __attribute__((vector_size(32)));

 // Based on https://en.algorithmica.org/hpc/algorithms/prefix/
 inline void prefix_x2(int64_t *p) {
  __m256i x = _mm256_load_si256((__m256i *)p);
  x = _mm256_add_epi64(x, _mm256_slli_si256(x, 8));
  _mm256_store_si256((__m256i *)p, x);
 }

 inline __m128i update(int64_t *p, __m128i s) {
  __m128i d = _mm_set1_epi64x(p[1]);
  __m128i x = _mm_load_si128((__m128i *)p);
  x = _mm_add_epi64(s, x);
  _mm_store_si128((__m128i *)p, x);
  return _mm_add_epi64(s, d);
 }

 template <int N> inline void prefix(int64_t *a) {
  for (int i = 0; i < N; i += 4)
    prefix_x2(&a[i]);

  __m128i s = _mm_setzero_si128();
  for (int i = 0; i < N; i += 2)
    s = update(&a[i], s);
 }

 // Based on https://en.algorithmica.org/hpc/data-structures/segment-trees/
 template <int N, int b = 5> struct WideSegmentTree {
  constexpr static int B = 1 << b;

  struct Precalc {
    alignas(64) uint64_t mask[B][B];

    constexpr Precalc() : mask{} {
      for (int k = 0; k < B; k++)
        for (int i = 0; i < B; i++)
          mask[k][i] = (i > k ? -1 : 0);
    }
  };

  constexpr static Precalc T{};

  constexpr static int height(int n) { return (n <= B ? 1 : height(n / B) + 1); }

  constexpr static int offset(int h) {
    int s = 0, n = N;
    while (h--) {
      n = (n + B - 1) / B;
      s += n * B + B; // extra padding to make build easier to implement
    }
    return s;
  }

  constexpr static int H = height(N);

  alignas(64) int64_t data[offset(H)];

  inline void build() {
    for (int h = 0; h < H - 1; h++) {
      const int s = offset(h + 1) - offset(h) - B; // outer loop isn't unrolled properly without this
      for (int i = 0; i < s; i += B) {
        prefix<B>(&data[offset(h) + i]);
        data[offset(h + 1) + (i >> b) + 1] = data[offset(h) + i + B - 1] + data[offset(h) + i + B];
        data[offset(h) + i + B] = 0;
      }
    }

    prefix<B>(&data[offset(H - 1)]);
  }

  inline int64_t sum(int k) const {
    int64_t res = 0;
    for (int h = H - 1; h >= 0; h--)
      res += data[offset(h) + (k >> (h * b))];
    return res;
  }

  inline void add(int k, int _x) {
    v4ull x = _x + v4ull{};
    for (int h = H - 1; h >= 0; h--) {
      int p = k >> (h * b);
      auto l = (v4ull *)&data[offset(h) + (p & ~(B - 1))];
      auto m = (v4ull *)T.mask[p & (B - 1)];
      for (int i = 0; i < B / 4; i++)
        l[i] += x & m[i];
    }
  }
 };

 WideSegmentTree<MAX_N> s;
 int x[MAX_N];

 int main() {
  uint32_t n, q;
  cin >> n >> q;

  for (int i = 0; i < n; i++) {
    cin >> x[i];
    s.data[s.offset(0) + i + 1] = x[i];
  }

  s.build();

  for (int i = 0; i < q; i++) {
    char t;
    cin >> t;

    if (t == '1') {
      uint32_t k, u;
      cin >> k >> u, k--;
      s.add(k, u - x[k]);
      x[k] = u;
    } else {
      uint32_t a, b;
      cin >> a >> b, a--;
      cout << s.sum(b) - s.sum(a) << '\n';
    }
  }
 }
	#define NDEBUG

	#pragma GCC optimize("Ofast", "unroll-loops")
	#pragma GCC target("avx2", "bmi", "bmi2", "popcnt", "lzcnt")

	#include <bits/stdc++.h>
	#include <x86intrin.h>

	const int MAX_N = 200'000;

	template< class T >
	constexpr std::size_t tuple_size_v = std::tuple_size<T>::value;

	/nor's fastio: https://judge.yosupo.jp/submission/75233/
	struct IOPre {
	static constexpr int TEN = 10, SZ = TEN * TEN * TEN * TEN;
	std::array<char, 4 * SZ> num;
	constexpr IOPre() : num{} {
	for (int i = 0; i < SZ; i++) {
	int n = i;
	for (int j = 3; j >= 0; j--) {
	num[i * 4 + j] = static_cast<char>(n % TEN + '0');
	n /= TEN;
	}
	}
	}
	};
	struct IO {
	static constexpr int SZ = 1 << 17, LEN = 32, TEN = 10, HUNDRED = TEN * TEN, THOUSAND = HUNDRED * TEN,
	TENTHOUSAND = THOUSAND * TEN, MAGIC_MULTIPLY = 205, MAGIC_SHIFT = 11, MASK = 15, TWELVE = 12,
	SIXTEEN = 16;
	static constexpr IOPre io_pre = {};
	std::array<char, SZ> input_buffer, output_buffer;
	int input_ptr_left, input_ptr_right, output_ptr_right;
	IO() : input_buffer{}, output_buffer{}, input_ptr_left{}, input_ptr_right{}, output_ptr_right{} {}
	IO(const IO &) = delete;
	IO(IO &&) = delete;
	IO &operator=(const IO &) = delete;
	IO &operator=(IO &&) = delete;
	~IO() { flush(); }
	template <class T> struct is_char {
	static constexpr bool value = std::is_same_v<T, char>;
	};
	template <class T> struct is_bool {
	static constexpr bool value = std::is_same_v<T, bool>;
	};
	template <class T> struct is_string {
	static constexpr bool value = std::is_same_v<T, std::string> \|\| std::is_same_v<T, const char *> \|\|
	std::is_same_v<T, char > \|\| std::is_same_v<std::decay_t<T>, char >;
	;
	};
	template <class T, class D = void> struct is_custom {
	static constexpr bool value = false;
	};
	template <class T> struct is_custom<T, std::void_t<typename T::internal_value_type>> {
	static constexpr bool value = true;
	};
	template <class T> struct is_default {
	static constexpr bool value =
	is_char<T>::value \|\| is_bool<T>::value \|\| is_string<T>::value \|\| std::is_integral_v<T>;
	};
	template <class T, class D = void> struct is_iterable {
	static constexpr bool value = false;
	};
	template <class T> struct is_iterable<T, typename std::void_t<decltype(std::begin(std::declval<T>()))>> {
	static constexpr bool value = true;
	};
	template <class T, class D = void, class E = void> struct is_applyable {
	static constexpr bool value = false;
	};
	template <class T>
	struct is_applyable<T, std::void_t<typename std::tuple_size<T>::type>,
	std::void_t<decltype(std::get<0>(std::declval<T>()))>> {
	static constexpr bool value = true;
	};
	template <class T>
	static constexpr bool needs_newline = (is_iterable<T>::value \|\| is_applyable<T>::value) && (!is_default<T>::value);
	template <typename T, typename U> struct any_needs_newline {
	static constexpr bool value = false;
	};
	template <typename T> struct any_needs_newline<T, std::index_sequence<>> {
	static constexpr bool value = false;
	};
	template <typename T, std::size_t I, std::size_t... Is> struct any_needs_newline<T, std::index_sequence<I, Is...>> {
	static constexpr bool value = needs_newline<decltype(std::get<I>(std::declval<T>()))> \|\|
	any_needs_newline<T, std::index_sequence<Is...>>::value;
	};
	inline void load() {
	memmove(std::begin(input_buffer), std::begin(input_buffer) + input_ptr_left, input_ptr_right - input_ptr_left);
	input_ptr_right = input_ptr_right - input_ptr_left +
	static_cast<int>(fread_unlocked(std::begin(input_buffer) + input_ptr_right - input_ptr_left, 1,
	SZ - input_ptr_right + input_ptr_left, stdin));
	input_ptr_left = 0;
	}
	inline void read_char(char &c) {
	if (input_ptr_left + LEN > input_ptr_right)
	load();
	c = input_buffer[input_ptr_left++];
	}
	inline void read_string(std::string &x) {
	char c;
	while (read_char(c), c < '!')
	continue;
	x = c;
	while (read_char(c), c >= '!')
	x += c;
	}
	template <class T> inline std::enable_if_t<std::is_integral_v<T>, void> read_int(T &x) {
	if (input_ptr_left + LEN > input_ptr_right)
	load();
	char c = 0;
	do
	c = input_buffer[input_ptr_left++];
	while (c < '-');
	[[maybe_unused]] bool minus = false;
	if constexpr (std::is_signed<T>::value == true)
	if (c == '-')
	minus = true, c = input_buffer[input_ptr_left++];
	x = 0;
	while (c >= '0')
	x = x * TEN + (c & MASK), c = input_buffer[input_ptr_left++];
	if constexpr (std::is_signed<T>::value == true)
	if (minus)
	x = -x;
	}
	inline void skip_space() {
	if (input_ptr_left + LEN > input_ptr_right)
	load();
	while (input_buffer[input_ptr_left] <= ' ')
	input_ptr_left++;
	}
	inline void flush() {
	fwrite_unlocked(std::begin(output_buffer), 1, output_ptr_right, stdout);
	output_ptr_right = 0;
	}
	inline void write_char(char c) {
	if (output_ptr_right > SZ - LEN)
	flush();
	output_buffer[output_ptr_right++] = c;
	}
	inline void write_bool(bool b) {
	if (output_ptr_right > SZ - LEN)
	flush();
	output_buffer[output_ptr_right++] = b ? '1' : '0';
	}
	inline void write_string(const std::string &s) {
	for (auto x : s)
	write_char(x);
	}
	inline void write_string(const char *s) {
	while (*s)
	write_char(*s++);
	}
	inline void write_string(char *s) {
	while (*s)
	write_char(*s++);
	}
	template <typename T> inline std::enable_if_t<std::is_integral_v<T>, void> write_int(T x) {
	if (output_ptr_right > SZ - LEN)
	flush();
	if (!x) {
	output_buffer[output_ptr_right++] = '0';
	return;
	}
	if constexpr (std::is_signed<T>::value == true)
	if (x < 0)
	output_buffer[output_ptr_right++] = '-', x = -x;
	int i = TWELVE;
	std::array<char, SIXTEEN> buf{};
	while (x >= TENTHOUSAND) {
	memcpy(std::begin(buf) + i, std::begin(io_pre.num) + (x % TENTHOUSAND) * 4, 4);
	x /= TENTHOUSAND;
	i -= 4;
	}
	if (x < HUNDRED) {
	if (x < TEN) {
	output_buffer[output_ptr_right++] = static_cast<char>('0' + x);
	} else {
	std::uint32_t q = (static_cast<std::uint32_t>(x) * MAGIC_MULTIPLY) >> MAGIC_SHIFT;
	std::uint32_t r = static_cast<std::uint32_t>(x) - q * TEN;
	output_buffer[output_ptr_right] = static_cast<char>('0' + q);
	output_buffer[output_ptr_right + 1] = static_cast<char>('0' + r);
	output_ptr_right += 2;
	}
	} else {
	if (x < THOUSAND) {
	memcpy(std::begin(output_buffer) + output_ptr_right, std::begin(io_pre.num) + (x << 2) + 1, 3),
	output_ptr_right += 3;
	} else {
	memcpy(std::begin(output_buffer) + output_ptr_right, std::begin(io_pre.num) + (x << 2), 4),
	output_ptr_right += 4;
	}
	}
	memcpy(std::begin(output_buffer) + output_ptr_right, std::begin(buf) + i + 4, TWELVE - i);
	output_ptr_right += TWELVE - i;
	}
	template <typename T_> IO &operator<<(T_ &&x) {
	using T = typename std::remove_cv<typename std::remove_reference<T_>::type>::type;
	static_assert(is_custom<T>::value or is_default<T>::value or is_iterable<T>::value or is_applyable<T>::value);
	if constexpr (is_custom<T>::value) {
	write_int(x.get());
	} else if constexpr (is_default<T>::value) {
	if constexpr (is_bool<T>::value) {
	write_bool(x);
	} else if constexpr (is_string<T>::value) {
	write_string(x);
	} else if constexpr (is_char<T>::value) {
	write_char(x);
	} else if constexpr (std::is_integral_v<T>) {
	write_int(x);
	}
	} else if constexpr (is_iterable<T>::value) {
	using E = decltype(*std::begin(x));
	constexpr char sep = needs_newline<E> ? '\n' : ' ';
	int i = 0;
	for (const auto &y : x) {
	if (i++)
	write_char(sep);
	operator<<(y);
	}
	} else if constexpr (is_applyable<T>::value) {
	constexpr char sep = (any_needs_newline<T, std::make_index_sequence<tuple_size_v<T>>>::value) ? '\n' : ' ';
	int i = 0;
	std::apply([this, &sep, &i](auto const &...y) { (((i++ ? write_char(sep) : void()), this->operator<<(y)), ...); },
	x);
	}
	return *this;
	}
	template <typename T> IO &operator>>(T &x) {
	static_assert(is_custom<T>::value or is_default<T>::value or is_iterable<T>::value or is_applyable<T>::value);
	static_assert(!is_bool<T>::value);
	if constexpr (is_custom<T>::value) {
	typename T::internal_value_type y;
	read_int(y);
	x = y;
	} else if constexpr (is_default<T>::value) {
	if constexpr (is_string<T>::value) {
	read_string(x);
	} else if constexpr (is_char<T>::value) {
	read_char(x);
	} else if constexpr (std::is_integral_v<T>) {
	read_int(x);
	}
	} else if constexpr (is_iterable<T>::value) {
	for (auto &y : x)
	operator>>(y);
	} else if constexpr (is_applyable<T>::value) {
	std::apply([this](auto &...y) { ((this->operator>>(y)), ...); }, x);
	}
	return *this;
	}
	IO *tie(std::nullptr_t) { return this; }
	void sync_with_stdio(bool) {}
	};
	IO io;

	#define cin io
	#define cout io

	typedef uint64_t v4ull __attribute__((vector_size(32)));

	// Based on https://en.algorithmica.org/hpc/algorithms/prefix/
	inline void prefix_x2(int64_t *p) {
	__m256i x = _mm256_load_si256((__m256i *)p);
	x = _mm256_add_epi64(x, _mm256_slli_si256(x, 8));
	_mm256_store_si256((__m256i *)p, x);
	}

	inline __m128i update(int64_t *p, __m128i s) {
	__m128i d = _mm_set1_epi64x(p[1]);
	__m128i x = _mm_load_si128((__m128i *)p);
	x = _mm_add_epi64(s, x);
	_mm_store_si128((__m128i *)p, x);
	return _mm_add_epi64(s, d);
	}

	template <int N> inline void prefix(int64_t *a) {
	for (int i = 0; i < N; i += 4)
	prefix_x2(&a[i]);

	__m128i s = _mm_setzero_si128();
	for (int i = 0; i < N; i += 2)
	s = update(&a[i], s);
	}

	// Based on https://en.algorithmica.org/hpc/data-structures/segment-trees/
	template <int N, int b = 5> struct WideSegmentTree {
	constexpr static int B = 1 << b;

	struct Precalc {
	alignas(64) uint64_t mask[B][B];

	constexpr Precalc() : mask{} {
	for (int k = 0; k < B; k++)
	for (int i = 0; i < B; i++)
	mask[k][i] = (i > k ? -1 : 0);
	}
	};

	constexpr static Precalc T{};

	constexpr static int height(int n) { return (n <= B ? 1 : height(n / B) + 1); }

	constexpr static int offset(int h) {
	int s = 0, n = N;
	while (h--) {
	n = (n + B - 1) / B;
	s += n * B + B; // extra padding to make build easier to implement
	}
	return s;
	}

	constexpr static int H = height(N);

	alignas(64) int64_t data[offset(H)];

	inline void build() {
	for (int h = 0; h < H - 1; h++) {
	const int s = offset(h + 1) - offset(h) - B; // outer loop isn't unrolled properly without this
	for (int i = 0; i < s; i += B) {
	prefix<B>(&data[offset(h) + i]);
	data[offset(h + 1) + (i >> b) + 1] = data[offset(h) + i + B - 1] + data[offset(h) + i + B];
	data[offset(h) + i + B] = 0;
	}
	}

	prefix<B>(&data[offset(H - 1)]);
	}

	inline int64_t sum(int k) const {
	int64_t res = 0;
	for (int h = H - 1; h >= 0; h--)
	res += data[offset(h) + (k >> (h * b))];
	return res;
	}

	inline void add(int k, int _x) {
	v4ull x = _x + v4ull{};
	for (int h = H - 1; h >= 0; h--) {
	int p = k >> (h * b);
	auto l = (v4ull *)&data[offset(h) + (p & ~(B - 1))];
	auto m = (v4ull *)T.mask[p & (B - 1)];
	for (int i = 0; i < B / 4; i++)
	l[i] += x & m[i];
	}
	}
	};

	WideSegmentTree<MAX_N> s;
	int x[MAX_N];

	int main() {
	uint32_t n, q;
	cin >> n >> q;

	for (int i = 0; i < n; i++) {
	cin >> x[i];
	s.data[s.offset(0) + i + 1] = x[i];
	}

	s.build();

	for (int i = 0; i < q; i++) {
	char t;
	cin >> t;

	if (t == '1') {
	uint32_t k, u;
	cin >> k >> u, k--;
	s.add(k, u - x[k]);
	x[k] = u;
	} else {
	uint32_t a, b;
	cin >> a >> b, a--;
	cout << s.sum(b) - s.sum(a) << '\n';
	}
	}
	}