FHT.cpp

View this file on GitHub
Last update: 2023-03-06 11:24:14-03:00
Link: View error logs on GitHub Actions

Depends on

Verified with

Code

#ifndef _LIB_FHT
#define _LIB_FHT
#include <bits/stdc++.h>
#include "BitTricks.cpp"
#include "NTT.cpp"
#include "polynomial/Transform.cpp"

namespace lib {
using namespace std;
namespace linalg {
template <typename Ring>
struct FHT {
  using Provider = MintRootProvider<Ring>;
  using T = Ring;
  using U = make_unsigned_t<typename Ring::type_int>;
  using U64 = make_unsigned_t<typename Ring::large_int>;
  static vector<Ring> fa;
  static const int MAX_LG_N = 30;
  static vector<Ring> g, ig;
  
  static void precompute() {
    if(!g.empty()) return;
    Provider();
    g.resize(MAX_LG_N);
    ig.resize(MAX_LG_N);
    for(int i = 0; i < MAX_LG_N; i++) {
      Ring w = Provider::g ^ (((Ring::mod-1) >> (i + 2)) * 3);
      w = -w;
      Ring iw = w.inverse();
      g[i] = w;
      ig[i] = iw;
    }
  }

  static inline U& v(Ring& p) {
    return (U&)p.data();
  }

  static inline U v(const Ring& p) {
    return (U)p.data();
  }

  static void dft_iter(Ring *p, int n) {
    // decimation-in-time
    // natural to reverse ordering
    for (int B = n >> 1; B; B >>= 1) {
      Ring w = 1;
      for (int i = 0, twiddle = 0; i < n; i += B * 2) {
        for (int j = i; j < i + B; j++) {
          Ring z = p[j + B] * w;
          p[j + B] = p[j] - z;
          p[j] += z;
        }
        w *= g[__builtin_ctz(++twiddle)];
      }
    }
  }

  static void idft_iter(Ring *p, int n) {
    // decimation-in-frequency
    // reverse to natural ordering
    for (int B = 1; B < n; B <<= 1) {
      Ring w = 1;
      for (int i = 0, twiddle = 0; i < n; i += B * 2) {
        for (int j = i; j < i + B; j++) {
          Ring z = (p[j] - p[j + B]) * w;
          p[j] += p[j + B];
          p[j + B] = z;
        }
        w *= ig[__builtin_ctz(++twiddle)];
      }
    }
  }

  static void swap(vector<Ring> &buf) { std::swap(fa, buf); }
  static void _dft(Ring *p, int n) {
    precompute();
    dft_iter(p, n);
  }
  static void _idft(Ring *p, int n) {
    precompute();
    idft_iter(p, n);
    Ring inv = Provider().inverse(n);
    for (int i = 0; i < n; i++)
      p[i] *= inv;
  }

  static void dft(int n) { _dft(fa.data(), n); }

  static void idft(int n) { _idft(fa.data(), n); }

  static void dft(vector<Ring> &v, int n) {
    swap(v);
    dft(n);
    swap(v);
  }
  static void idft(vector<Ring> &v, int n) {
    swap(v);
    idft(n);
    swap(v);
  }

  static int ensure(int a, int b = 0) {
    int n = a+b;
    n = next_power_of_two(n);
    if ((int)fa.size() < n)
      fa.resize(n);
    return n;
  }

  static void clear(int n) { fill(fa.begin(), fa.begin() + n, 0); }

  template<typename Iterator>
  static void fill(Iterator begin, Iterator end) {
    int n = ensure(distance(begin, end));
    int i = 0;
    for(auto it = begin; it != end; ++it) {
      fa[i++] = *it;
    }
    for(;i < n; i++) fa[i] = Ring();
  }

  static void _convolve(const vector<T> &a) {
    int n = ensure(a.size(), a.size());
    for (size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < a.size() ? a[i] : T();
    dft(n);
    for (int i = 0; i < n; i++)
      fa[i] *= fa[i];
    idft(n);
  }

  static void _convolve(const vector<T> &a, const vector<T> &b) {
    if(std::addressof(a) == std::addressof(b))
      return _convolve(a);
    int n = ensure(a.size(), b.size());
    for (size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < a.size() ? a[i] : T();
    dft(n);
    // TODO: have a buffer for this
    auto fb = retrieve<FHT<T>, T>(n);
    for(size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < b.size() ? b[i] : T();
    dft(n);
    for (int i = 0; i < n; i++)
      fa[i] *= fb[i];
    idft(n);
  }

  static vector<T> convolve(const vector<T>& a, const vector<T>& b) {
    int sz = (int)a.size() + b.size() - 1;
    _convolve(a, b);
    return retrieve<FHT<T>, T>(sz);
  }
  
  static VectorN<T> transform(vector<T> a, int n) {
    a.resize(n);
    dft(a, n);
    return a;
  }

  static vector<T> itransform(vector<T> a, int n) {
    int sz = a.size();
    idft(a, sz);
    a.resize(min(n, sz));
    return a;
  }
};

template<typename Ring>
vector<Ring> FHT<Ring>::fa = vector<Ring>();
template<typename Ring>
vector<Ring> FHT<Ring>::g = vector<Ring>();
template<typename Ring>
vector<Ring> FHT<Ring>::ig = vector<Ring>();
}

using FHTMultiplication = TransformMultiplication<linalg::FHT>;
} // namespace lib

#endif

#line 1 "FHT.cpp"


#include <bits/stdc++.h>
#line 1 "BitTricks.cpp"


#line 4 "BitTricks.cpp"

namespace lib {
long long next_power_of_two(long long n) {
  if (n <= 0) return 1;
  return 1LL << (sizeof(long long) * 8 - 1 - __builtin_clzll(n) +
                 ((n & (n - 1LL)) != 0));
}
} // namespace lib


#line 1 "NTT.cpp"


#line 1 "DFT.cpp"


#line 5 "DFT.cpp"

namespace lib {
using namespace std;
namespace linalg {
template <typename Ring, typename Provider>
struct DFT {
  static vector<int> rev;
  static vector<Ring> fa;

  // function used to precompute rev for fixed size fft (n is a power of two)
  static void dft_rev(int n) {
    Provider()(n);
    int lbn = __builtin_ctz(n);
    if ((int)rev.size() < (1 << lbn))
      rev.resize(1 << lbn);
    int h = -1;
    for (int i = 1; i < n; i++) {
      if ((i & (i - 1)) == 0)
        h++;
      rev[i] = rev[i ^ (1 << h)] | (1 << (lbn - h - 1));
    }
  }

  static void dft_iter(Ring *p, int n) {
    Provider w;
    for (int L = 2; L <= n; L <<= 1) {
      for (int i = 0; i < n; i += L) {
        for (int j = 0; j < L / 2; j++) {
          Ring z = p[i + j + L / 2] * w[j + L / 2];
          p[i + j + L / 2] = p[i + j] - z;
          p[i + j] += z;
        }
      }
    }
  }

  static void swap(vector<Ring> &buf) { std::swap(fa, buf); }
  static void _dft(Ring *p, int n) {
    dft_rev(n);
    for (int i = 0; i < n; i++)
      if (i < rev[i])
        std::swap(p[i], p[rev[i]]);
    dft_iter(p, n);
  }
  static void _idft(Ring *p, int n) {
    _dft(p, n);
    reverse(p + 1, p + n);
    Ring inv = Provider().inverse(n);
    for (int i = 0; i < n; i++)
      p[i] *= inv;
  }

  static void dft(int n) { _dft(fa.data(), n); }

  static void idft(int n) { _idft(fa.data(), n); }

  static void dft(vector<Ring> &v, int n) {
    swap(v);
    dft(n);
    swap(v);
  }
  static void idft(vector<Ring> &v, int n) {
    swap(v);
    idft(n);
    swap(v);
  }

  static int ensure(int a, int b = 0) {
    int n = a+b;
    n = next_power_of_two(n);
    if ((int)fa.size() < n)
      fa.resize(n);
    return n;
  }

  static void clear(int n) { fill(fa.begin(), fa.begin() + n, 0); }

  template<typename Iterator>
  static void fill(Iterator begin, Iterator end) {
    int n = ensure(distance(begin, end));
    int i = 0;
    for(auto it = begin; it != end; ++it) {
      fa[i++] = *it;
    }
    for(;i < n; i++) fa[i] = Ring();
  }
};

template<typename DF, typename U>
static vector<U> retrieve(int n) {
  assert(n <= DF::fa.size());
  vector<U> res(n);
  for(int i = 0; i < n; i++) res[i] = (U)DF::fa[i];
  return res;
}

template<typename Ring, typename Provider>
vector<int> DFT<Ring, Provider>::rev = vector<int>();

template<typename Ring, typename Provider>
vector<Ring> DFT<Ring, Provider>::fa = vector<Ring>();
}
} // namespace lib


#line 1 "NumberTheory.cpp"


#line 4 "NumberTheory.cpp"

namespace lib {
using namespace std;
namespace nt {
int64_t inverse(int64_t a, int64_t b) {
  long long b0 = b, t, q;
  long long x0 = 0, x1 = 1;
  if (b == 1)
    return 1;
  while (a > 1) {
    q = a / b;
    t = b, b = a % b, a = t;
    t = x0, x0 = x1 - q * x0, x1 = t;
  }
  if (x1 < 0)
    x1 += b0;
  return x1;
}
template<typename T, typename U>
T powmod (T a, U b, U p) {
    int res = 1;
    while (b)
        if (b & 1)
            res = (int) (res * 1ll * a % p),  --b;
        else
            a = (int) (a * 1ll * a % p),  b >>= 1;
    return res;
}
template<typename T>
vector<T> factors(T n) {
  vector<T> f;
  for(T i = 2; i*i <= n; i++) {
    if(n % i == 0) f.push_back(i);
    while(n % i == 0) n /= i;
  }
  if(n > 1) f.push_back(n);
  return f;
}
} // namespace nt
} // namespace lib


#line 1 "VectorN.cpp"


#line 1 "Traits.cpp"


#line 4 "Traits.cpp"

namespace lib {
using namespace std;
namespace traits {

template <typename...> struct make_void { using type = void; };

template <typename... T> using void_t = typename make_void<T...>::type;

/// keep caide
template <typename Iterator>
using IteratorCategory = typename iterator_traits<Iterator>::iterator_category;

/// keep caide
template <typename Container>
using IteratorCategoryOf = IteratorCategory<typename Container::iterator>;

/// keep caide
template <typename Iterator>
using IteratorValue = typename iterator_traits<Iterator>::value_type;

/// keep caide
template <typename Container>
using IteratorValueOf = IteratorValue<typename Container::iterator>;

/// keep caide
template <typename Iterator>
using IsRandomIterator =
    is_base_of<random_access_iterator_tag, IteratorCategory<Iterator>>;

/// keep caide
template <typename Iterator>
using IsInputIterator =
    is_base_of<input_iterator_tag, IteratorCategory<Iterator>>;

/// keep caide
template <typename Iterator>
using IsBidirectionalIterator =
    is_base_of<bidirectional_iterator_tag, IteratorCategory<Iterator>>;

/// keep caide
template <typename Container>
using HasRandomIterator =
    is_base_of<random_access_iterator_tag, IteratorCategoryOf<Container>>;

/// keep caide
template <typename Container>
using HasInputIterator =
    is_base_of<input_iterator_tag, IteratorCategoryOf<Container>>;

/// keep caide
template <typename Container>
using HasBidirectionalIterator =
    is_base_of<bidirectional_iterator_tag, IteratorCategoryOf<Container>>;
} // namespace traits
} // namespace lib


#line 5 "VectorN.cpp"

#define VEC_CONST_OP(op, typ) \
  type operator op(const typ rhs) const { \
    auto res = *this; \
    return res op##= rhs; \
  }

#define VEC_BIN_OP(op) \
  type& operator op##=(const type& rhs) { \
    if(rhs.size() > this->size()) \
      this->resize(rhs.size()); \
    int sz = this->size(); \
    for(int i = 0; i < (int)rhs.size(); i++) \
      (*this)[i] op##= rhs[i]; \
    for(int i = rhs.size(); i < sz; i++) \
      (*this)[i] op##= 0; \
    return *this; \
  } \
  VEC_CONST_OP(op, type)

#define VEC_SINGLE_OP(op, typ) \
  type& operator op##=(const typ rhs) { \
    for(auto& x : *this) \
      x op##= rhs; \
    return *this; \
  } \
  VEC_CONST_OP(op, typ)
  

namespace lib {
using namespace std;
template<typename T>
struct VectorN : vector<T> {
  using type = VectorN<T>;

  template <
      typename Container,
      typename enable_if<traits::HasInputIterator<Container>::value>::type * = nullptr>
  VectorN(const Container &container)
      : vector<T>(container.begin(), container.end()) {}

  VectorN(const initializer_list<T> &v)
      : vector<T>(v.begin(), v.end()) {}

  template<typename... Args>
  VectorN( Args&&... args ) 
      : vector<T>(std::forward<Args>(args)...) {}

  VEC_BIN_OP(+)
  VEC_BIN_OP(-)
  VEC_BIN_OP(*)

  VEC_SINGLE_OP(+, T&)
  VEC_SINGLE_OP(-, T&)
  VEC_SINGLE_OP(*, T&)
  VEC_SINGLE_OP(/, T&)
  VEC_SINGLE_OP(^, int64_t)

  type operator-() const {
    auto res = *this;
    for(auto& x : res) x = -x;
    return res;
  }

  type operator%(int n) const {
    // TODO: get rid of this
    // return *const_cast<type*>(this);
    return *this;
  }
};
} // namespace lib


#line 7 "NTT.cpp"

namespace lib {
using namespace std;
namespace linalg {
template<typename T>
struct MintRootProvider {
  static size_t max_sz;
  static T g;
  static vector<T> w;

  MintRootProvider() {
    if(g == 0) {
      auto acc = T::mod-1;
      while(acc % 2 == 0) acc /= 2, max_sz++;

      auto factors = nt::factors(T::mod - 1);
      for(g = 2; (typename T::type_int)g < T::mod; g++) {
        bool ok = true;
        for(auto f : factors) {
          if(power(g, (T::mod-1)/f) == 1) {
            ok = false;
            break;
          }
        }
        if(ok) break;
      }
      assert(g != 0);
    }
  }

  pair<T, T> roots(int num, int den) {
    auto p = g ^ ((long long)(T::mod - 1) / den * num);
    return {p, p.inverse()};
  }

  T operator()(int n, int k) {
    return power(g, (T::mod-1)/(n/k));
  }
  void operator()(int n) {
    n = max(n, 2);
    int k = max((int)w.size(), 2);
    assert(n <= (1LL << max_sz));
    if ((int)w.size() < n)
      w.resize(n);
    else
      return;
    w[0] = w[1] = 1;
    for (; k < n; k *= 2) {
      T step = power(g, (T::mod-1)/(2*k));
      for(int i = k; i < 2*k; i++)
        w[i] = (i&1) ? w[i/2] * step : w[i/2];
    }
  }
  T operator[](int i) {
    return w[i];
  }

  T inverse(int n) {
    return T(1) / n;
  }
};

template<typename T>
size_t MintRootProvider<T>::max_sz = 1;
template<typename T>
T MintRootProvider<T>::g = T();
template<typename T>
vector<T> MintRootProvider<T>::w = vector<T>();

template<typename T>
struct NTT : public DFT<T, MintRootProvider<T>> {
  using Parent = DFT<T, MintRootProvider<T>>;
  using Parent::fa;
  using Parent::dft;
  using Parent::idft;

  static void _convolve(const vector<T> &a) {
    int n = Parent::ensure(a.size(), a.size());
    for (size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < a.size() ? a[i] : T();
    Parent::dft(n);
    for (int i = 0; i < n; i++)
      fa[i] *= fa[i];
    Parent::idft(n);
  }

  static void _convolve(const vector<T> &a, const vector<T> &b) {
    if(std::addressof(a) == std::addressof(b))
      return _convolve(a);
    int n = Parent::ensure(a.size(), b.size());
    for (size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < a.size() ? a[i] : T();
    Parent::dft(n);
    // TODO: have a buffer for this
    auto fb = retrieve<Parent, T>(n);
    for(size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < b.size() ? b[i] : T();
    Parent::dft(n);
    for (int i = 0; i < n; i++)
      fa[i] *= fb[i];
    Parent::idft(n);
  }

  static vector<T> convolve(const vector<T>& a, const vector<T>& b) {
    int sz = (int)a.size() + b.size() - 1;
    _convolve(a, b);
    return retrieve<Parent, T>(sz);
  }
  
  static VectorN<T> transform(vector<T> a, int n) {
    a.resize(n);
    Parent::dft(a, n);
    return a;
  }

  static vector<T> itransform(vector<T> a, int n) {
    int sz = a.size();
    Parent::idft(a, sz);
    a.resize(min(n, sz));
    return a;
  }
};
}

struct NTTMultiplication {
  template<typename T>
  using Transform = linalg::NTT<T>;

  template <typename Field>
  vector<Field> operator()(const vector<Field> &a,
                           const vector<Field> &b) const {
    return linalg::NTT<Field>::convolve(a, b);
  };

  template<typename Field>
  inline VectorN<Field> transform(int n, const vector<Field>& p) const {
    int np = next_power_of_two(n);
    return linalg::NTT<Field>::transform(p, np);
  }

  template<typename Field>
  inline vector<Field> itransform(int n, const vector<Field>& p) const {
    return linalg::NTT<Field>::itransform(p, n);
  }

  template <typename Field, typename Functor, typename ...Ts>
  inline vector<Field> on_transform(
    int n,
    Functor& f,        
    const vector<Ts>&... vs) const {
    int np = next_power_of_two(n);
    return linalg::NTT<Field>::itransform(
      f(n, linalg::NTT<Field>::transform(vs, np)...), n);
  }
};
} // namespace lib


#line 1 "polynomial/Transform.cpp"


#line 4 "polynomial/Transform.cpp"

namespace lib {
using namespace std;
template<template <class> class T>
struct TransformMultiplication {
  template<typename Field>
  using Transform = T<Field>;

  template <typename Field>
  vector<Field> operator()(const vector<Field> &a,
                           const vector<Field> &b) const {
    return T<Field>::convolve(a, b);
  };

  template<typename Field>
  inline VectorN<Field> transform(int n, const vector<Field>& p) const {
    int np = next_power_of_two(n);
    return T<Field>::transform(p, np);
  }

  template<typename Field>
  inline vector<Field> itransform(int n, const vector<Field>& p) const {
    return T<Field>::itransform(p, n);
  }

  template <typename Field, typename Functor, typename ...Ts>
  inline vector<Field> on_transform(
    int n,
    Functor& f,        
    const vector<Ts>&... vs) const {
    int np = next_power_of_two(n);
    return T<Field>::itransform(
      f(n, T<Field>::transform(vs, np)...), n);
  }
};
} // namespace lib


#line 7 "FHT.cpp"

namespace lib {
using namespace std;
namespace linalg {
template <typename Ring>
struct FHT {
  using Provider = MintRootProvider<Ring>;
  using T = Ring;
  using U = make_unsigned_t<typename Ring::type_int>;
  using U64 = make_unsigned_t<typename Ring::large_int>;
  static vector<Ring> fa;
  static const int MAX_LG_N = 30;
  static vector<Ring> g, ig;
  
  static void precompute() {
    if(!g.empty()) return;
    Provider();
    g.resize(MAX_LG_N);
    ig.resize(MAX_LG_N);
    for(int i = 0; i < MAX_LG_N; i++) {
      Ring w = Provider::g ^ (((Ring::mod-1) >> (i + 2)) * 3);
      w = -w;
      Ring iw = w.inverse();
      g[i] = w;
      ig[i] = iw;
    }
  }

  static inline U& v(Ring& p) {
    return (U&)p.data();
  }

  static inline U v(const Ring& p) {
    return (U)p.data();
  }

  static void dft_iter(Ring *p, int n) {
    // decimation-in-time
    // natural to reverse ordering
    for (int B = n >> 1; B; B >>= 1) {
      Ring w = 1;
      for (int i = 0, twiddle = 0; i < n; i += B * 2) {
        for (int j = i; j < i + B; j++) {
          Ring z = p[j + B] * w;
          p[j + B] = p[j] - z;
          p[j] += z;
        }
        w *= g[__builtin_ctz(++twiddle)];
      }
    }
  }

  static void idft_iter(Ring *p, int n) {
    // decimation-in-frequency
    // reverse to natural ordering
    for (int B = 1; B < n; B <<= 1) {
      Ring w = 1;
      for (int i = 0, twiddle = 0; i < n; i += B * 2) {
        for (int j = i; j < i + B; j++) {
          Ring z = (p[j] - p[j + B]) * w;
          p[j] += p[j + B];
          p[j + B] = z;
        }
        w *= ig[__builtin_ctz(++twiddle)];
      }
    }
  }

  static void swap(vector<Ring> &buf) { std::swap(fa, buf); }
  static void _dft(Ring *p, int n) {
    precompute();
    dft_iter(p, n);
  }
  static void _idft(Ring *p, int n) {
    precompute();
    idft_iter(p, n);
    Ring inv = Provider().inverse(n);
    for (int i = 0; i < n; i++)
      p[i] *= inv;
  }

  static void dft(int n) { _dft(fa.data(), n); }

  static void idft(int n) { _idft(fa.data(), n); }

  static void dft(vector<Ring> &v, int n) {
    swap(v);
    dft(n);
    swap(v);
  }
  static void idft(vector<Ring> &v, int n) {
    swap(v);
    idft(n);
    swap(v);
  }

  static int ensure(int a, int b = 0) {
    int n = a+b;
    n = next_power_of_two(n);
    if ((int)fa.size() < n)
      fa.resize(n);
    return n;
  }

  static void clear(int n) { fill(fa.begin(), fa.begin() + n, 0); }

  template<typename Iterator>
  static void fill(Iterator begin, Iterator end) {
    int n = ensure(distance(begin, end));
    int i = 0;
    for(auto it = begin; it != end; ++it) {
      fa[i++] = *it;
    }
    for(;i < n; i++) fa[i] = Ring();
  }

  static void _convolve(const vector<T> &a) {
    int n = ensure(a.size(), a.size());
    for (size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < a.size() ? a[i] : T();
    dft(n);
    for (int i = 0; i < n; i++)
      fa[i] *= fa[i];
    idft(n);
  }

  static void _convolve(const vector<T> &a, const vector<T> &b) {
    if(std::addressof(a) == std::addressof(b))
      return _convolve(a);
    int n = ensure(a.size(), b.size());
    for (size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < a.size() ? a[i] : T();
    dft(n);
    // TODO: have a buffer for this
    auto fb = retrieve<FHT<T>, T>(n);
    for(size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < b.size() ? b[i] : T();
    dft(n);
    for (int i = 0; i < n; i++)
      fa[i] *= fb[i];
    idft(n);
  }

  static vector<T> convolve(const vector<T>& a, const vector<T>& b) {
    int sz = (int)a.size() + b.size() - 1;
    _convolve(a, b);
    return retrieve<FHT<T>, T>(sz);
  }
  
  static VectorN<T> transform(vector<T> a, int n) {
    a.resize(n);
    dft(a, n);
    return a;
  }

  static vector<T> itransform(vector<T> a, int n) {
    int sz = a.size();
    idft(a, sz);
    a.resize(min(n, sz));
    return a;
  }
};

template<typename Ring>
vector<Ring> FHT<Ring>::fa = vector<Ring>();
template<typename Ring>
vector<Ring> FHT<Ring>::g = vector<Ring>();
template<typename Ring>
vector<Ring> FHT<Ring>::ig = vector<Ring>();
}

using FHTMultiplication = TransformMultiplication<linalg::FHT>;
} // namespace lib