cp-includes

This documentation is automatically generated by online-judge-tools/verification-helper

View the Project on GitHub rsalesc/cp-includes

:heavy_check_mark: tests/yosupo/fht-convolution.test.cpp

Depends on

Code

#define PROBLEM "https://judge.yosupo.jp/problem/convolution_mod"

#include <bits/stdc++.h>
#include "ModularInteger.cpp"
#include "FHT.cpp"
using namespace std;
 
#define mp make_pair
#define mt make_tuple
#define pb push_back
#define ms(v, x) memset((v), (x), sizeof(v))
#define all(v) (v).begin(), (v).end()
#define ff first
#define ss second
#define iopt ios::sync_with_stdio(false); cin.tie(0)
#define untie(p, a, b) decltype(p.first) a = p.first, decltype(p.second) b = p.second
 
int gcd(int a, int b) { return b == 0 ? a : gcd(b, a%b); }
int power(int x, int p, int MOD) {
    if(p == 0) return 1%MOD;
    if(p == 1) return x%MOD;
    int res = power(x, p/2, MOD);
    res = (long long)res*res%MOD;
    if(p&1) res = (long long)res*x%MOD;
    return res;
}
 
typedef pair<int, int> ii;
typedef long double LD;
typedef vector<int> vi;

#pragma region template

using ll = long long;
using ull = unsigned long long;
constexpr ll TEN(int n) { return (n == 0) ? 1 : 10 * TEN(n - 1); }
template <class T> using V = vector<T>;
template <class T> using VV = V<V<T>>;

#ifdef LOCAL
struct PrettyOS {
    ostream& os;
    bool first;
    template <class T> auto operator<<(T&& x) {
        if (!first) os << ", ";
        first = false;
        os << x;
        return *this;
    }
};
template <class... T> void dbg0(T&&... t) {
    (PrettyOS{cerr, true} << ... << t);
}
#define dbg(...)                                            \
    do {                                                    \
        cerr << __LINE__ << " : " << #__VA_ARGS__ << " = "; \
        dbg0(__VA_ARGS__);                                  \
        cerr << endl;                                       \
    } while (false);
#else
#define dbg(...)
#endif

template <class T, class U>
ostream& operator<<(ostream& os, const pair<T, U>& p) {
    return os << "P(" << p.first << ", " << p.second << ")";
}

template <class T> ostream& operator<<(ostream& os, const V<T>& v) {
    os << "[";
    for (auto d : v) os << d << ", ";
    return os << "]";
}

struct Scanner {
    FILE* fp = nullptr;
    char line[1 << 15];
    size_t st = 0, ed = 0;
    void reread() {
        memmove(line, line + st, ed - st);
        ed -= st;
        st = 0;
        ed += fread(line + ed, 1, (1 << 15) - ed, fp);
    }
    bool succ() {
        while (true) {
            if (st == ed) {
                reread();
                if (st == ed) return false;
            }
            while (st != ed && isspace(line[st])) st++;
            if (st != ed) break;
        }
        if (ed - st <= 50) reread();
        return true;
    }
    template <class T, enable_if_t<is_same<T, string>::value, int> = 0>
    bool read_single(T& ref) {
        if (!succ()) return false;
        while (true) {
            succ();
            size_t sz = 1;
            while (st + sz < ed && !isspace(line[st + sz])) sz++;
            ref.append(line + st, sz);
            st += sz;
            if (st != ed) break;
        }
        return true;
    }
    template <class T, enable_if_t<is_integral<T>::value, int> = 0>
    bool read_single(T& ref) {
        if (!succ()) return false;
        bool neg = false;
        if (line[st] == '-') {
            neg = true;
            st++;
        }
        ref = T(0);
        while (isdigit(line[st])) {
            ref = 10 * ref + (line[st++] - '0');
        }
        if (neg) ref = -ref;
        return true;
    }
    template <class T> bool read_single(V<T>& ref) {
        for (auto& d : ref) {
            if (!read_single(d)) return false;
        }
        return true;
    }
    void read() {}
    template <class H, class... T> void read(H& h, T&... t) {
        bool f = read_single(h);
        assert(f);
        read(t...);
    }
    Scanner(FILE* _fp) : fp(_fp) {}
};

struct Printer {
  public:
    template <bool F = false> void write() {}
    template <bool F = false, class H, class... T>
    void write(const H& h, const T&... t) {
        if (F) write_single(' ');
        write_single(h);
        write<true>(t...);
    }
    template <class... T>
    void writeln(const T&... t) {
        write(t...);
        write_single('\n');
    }

    Printer(FILE* _fp) : fp(_fp) {}
    ~Printer() { flush(); }

  private:
    static constexpr size_t SIZE = 1 << 15;
    FILE* fp;
    char line[SIZE], small[50];
    size_t pos = 0;
    void flush() {
        fwrite(line, 1, pos, fp);
        pos = 0;
    }
    void write_single(const char& val) {
        if (pos == SIZE) flush();
        line[pos++] = val;
    }
    template <class T, enable_if_t<is_same<T, string>::value, int> = 0>
    void write_single(const T& val) {
        for (char c : val) write_single(c);
    }
    template <class T, enable_if_t<is_integral<T>::value, int> = 0>
    void write_single(T val) {
        if (pos > (1 << 15) - 50) flush();
        if (val == 0) {
            write_single('0');
            return;
        }
        if (val < 0) {
            write_single('-');
            val = -val;  // todo min
        }
        size_t len = 0;
        while (val) {
            small[len++] = char('0' + (val % 10));
            val /= 10;
        }
        reverse(small, small + len);
        memcpy(line + pos, small, len);
        pos += len;
    }
    template <class T> void write_single(const V<T>& val) {
        auto n = val.size();
        for (size_t i = 0; i < n; i++) {
            if (i) write_single(' ');
            write_single(val[i]);
        }
    }
};

#pragma endregion

using namespace lib;
using mint = MintNTT;

int32_t main(){
    Scanner sc(stdin);
    Printer pr(stdout);

    int n, m;
    sc.read(n, m);

    V<mint> a(n);
    for (int i = 0; i < n; i++) {
        int x;
        sc.read(x);
        a[i] = x;
    }
    V<mint> b(m);
    for (int i = 0; i < m; i++) {
        int x;
        sc.read(x);
        b[i] = x;
    }
    auto c = FHTMultiplication()(a, b);
    for (auto x: c) {
        pr.write((int)x);
        pr.write(' ');
    }
    pr.writeln();
    return 0;
}
#line 1 "tests/yosupo/fht-convolution.test.cpp"
#define PROBLEM "https://judge.yosupo.jp/problem/convolution_mod"

#include <bits/stdc++.h>
#line 1 "ModularInteger.cpp"


#line 1 "NumberTheory.cpp"


#line 4 "NumberTheory.cpp"

namespace lib {
using namespace std;
namespace nt {
int64_t inverse(int64_t a, int64_t b) {
  long long b0 = b, t, q;
  long long x0 = 0, x1 = 1;
  if (b == 1)
    return 1;
  while (a > 1) {
    q = a / b;
    t = b, b = a % b, a = t;
    t = x0, x0 = x1 - q * x0, x1 = t;
  }
  if (x1 < 0)
    x1 += b0;
  return x1;
}
template<typename T, typename U>
T powmod (T a, U b, U p) {
    int res = 1;
    while (b)
        if (b & 1)
            res = (int) (res * 1ll * a % p),  --b;
        else
            a = (int) (a * 1ll * a % p),  b >>= 1;
    return res;
}
template<typename T>
vector<T> factors(T n) {
  vector<T> f;
  for(T i = 2; i*i <= n; i++) {
    if(n % i == 0) f.push_back(i);
    while(n % i == 0) n /= i;
  }
  if(n > 1) f.push_back(n);
  return f;
}
} // namespace nt
} // namespace lib


#line 5 "ModularInteger.cpp"

#if __cplusplus < 201300
#error required(c++14)
#endif

namespace lib {
using namespace std;
namespace {
template <typename T, T... Mods> struct ModularIntegerBase {
  typedef ModularIntegerBase<T, Mods...> type;

  T x[sizeof...(Mods)];
  friend ostream &operator<<(ostream &output, const type &var) {
    output << "(";
    for (int i = 0; i < sizeof...(Mods); i++) {
      if (i)
        output << ", ";
      output << var.x[i];
    }
    return output << ")";
  }
};

template <typename T, T Mod> struct ModularIntegerBase<T, Mod> {
  typedef ModularIntegerBase<T, Mod> type;
  constexpr static T mod = Mod;

  T x[1];

  T& data() { return this->x[0]; }
  T data() const { return this->x[0]; }
  explicit operator int() const { return this->x[0]; }
  explicit operator int64_t() const { return this->x[0]; }
  explicit operator double() const { return this->x[0]; }
  explicit operator long double() const { return this->x[0]; }
  friend ostream &operator<<(ostream &output, const type &var) {
    return output << var.x[0];
  }
};

template<typename T, typename U, T... Mods>
struct InversesTable {
  constexpr static size_t n_mods = sizeof...(Mods);
  constexpr static T mods[sizeof...(Mods)] = {Mods...};
  constexpr static int n_inverses = 1e6 + 10;

  T v[n_inverses][n_mods];
  T max_x;

  InversesTable() : v(), max_x(n_inverses) {
    for(int j = 0; j < sizeof...(Mods); j++)
      v[1][j] = 1, max_x = min(max_x, mods[j]);
    for(int i = 2; i < max_x; i++) {
      for(int j = 0; j < sizeof...(Mods); j++) {
        v[i][j] = mods[j] - (T)((U)(mods[j] / i) * v[mods[j] % i][j] % mods[j]);
      }
    }
  }
};

// Make available for linkage.
template <typename T, class U, T... Mods>
constexpr T InversesTable<T, U, Mods...>::mods[];

template <typename T, class Enable, T... Mods>
struct ModularIntegerImpl : ModularIntegerBase<T, Mods...> {
  typedef ModularIntegerImpl<T, Enable, Mods...> type;
  typedef T type_int;
  typedef uint64_t large_int;
  constexpr static size_t n_mods = sizeof...(Mods);
  constexpr static T mods[sizeof...(Mods)] = {Mods...};
  using ModularIntegerBase<T, Mods...>::x;
  using Inverses = InversesTable<T, large_int, Mods...>;

  struct Less {
    bool operator()(const type &lhs, const type &rhs) const {
      for (size_t i = 0; i < sizeof...(Mods); i++)
        if (lhs.x[i] != rhs.x[i])
          return lhs.x[i] < rhs.x[i];
      return false;
    };
  };
  typedef Less less;


  constexpr ModularIntegerImpl() {
    for (size_t i = 0; i < sizeof...(Mods); i++)
      x[i] = T();
  }
  constexpr ModularIntegerImpl(large_int y) {
    for (size_t i = 0; i < sizeof...(Mods); i++) {
      x[i] = y % mods[i];
      if (x[i] < 0)
        x[i] += mods[i];
    }
  }
  static type with_remainders(T y[sizeof...(Mods)]) {
    type res;
    for (size_t i = 0; i < sizeof...(Mods); i++)
      res.x[i] = y[i];
    res.normalize();
    return res;
  }

  inline void normalize() {
    for (size_t i = 0; i < sizeof...(Mods); i++)
      if ((x[i] %= mods[i]) < 0)
        x[i] += mods[i];
  }

  inline T operator[](int i) const { return x[i]; }

  inline T multiply(T a, T b, T mod) const { return (large_int)a * b % mod; }

  inline T inv(T a, T mod) const { return static_cast<T>(nt::inverse(a, mod)); }

  inline T invi(T a, int i) const {
    const static Inverses inverses = Inverses();
    if(a < inverses.max_x)
      return inverses.v[a][i];
    return inv(a, mods[i]);
  }

  type inverse() const {
    T res[sizeof...(Mods)];
    for (size_t i = 0; i < sizeof...(Mods); i++)
      res[i] = invi(x[i], i);
    return type::with_remainders(res);
  }

  template <typename U> T power_(T a, U p, T mod) {
    if (mod == 1)
      return T();
    if (p < 0) {
      if (a == 0)
        throw domain_error("0^p with negative p is invalid");
      p = -p;
      a = inv(a, mod);
    }
    if (p == 0)
      return T(1);
    if (p == 1)
      return a;
    T res = 1;
    while (p > 0) {
      if (p & 1)
        res = multiply(res, a, mod);
      p >>= 1;
      a = multiply(a, a, mod);
    }
    return res;
  }

  inline type &operator+=(const type &rhs) {
    for (size_t i = 0; i < sizeof...(Mods); i++)
      if ((x[i] += rhs.x[i]) >= mods[i])
        x[i] -= mods[i];
    return *this;
  }
  inline type &operator-=(const type &rhs) {
    for (size_t i = 0; i < sizeof...(Mods); i++)
      if ((x[i] -= rhs.x[i]) < 0)
        x[i] += mods[i];
    return *this;
  }
  inline type &operator*=(const type &rhs) {
    for (size_t i = 0; i < sizeof...(Mods); i++)
      x[i] = multiply(x[i], rhs.x[i], mods[i]);
    return *this;
  }
  inline type &operator/=(const type &rhs) {
    for (size_t i = 0; i < sizeof...(Mods); i++)
      x[i] = multiply(x[i], invi(rhs.x[i], i), mods[i]);
    return *this;
  }

  inline type &operator+=(T rhs) {
    for (size_t i = 0; i < sizeof...(Mods); i++)
      if ((x[i] += rhs) >= mods[i])
        x[i] -= mods[i];
    return *this;
  }

  type &operator-=(T rhs) {
    for (size_t i = 0; i < sizeof...(Mods); i++)
      if ((x[i] -= rhs) < 0)
        x[i] += mods[i];
    return *this;
  }

  type &operator*=(T rhs) {
    for (size_t i = 0; i < sizeof...(Mods); i++)
      x[i] = multiply(x[i], rhs, mods[i]);
    return *this;
  }

  type &operator/=(T rhs) {
    for (size_t i = 0; i < sizeof...(Mods); i++)
      x[i] = multiply(invi(rhs, i), x[i], mods[i]);
    return *this;
  }

  type &operator^=(large_int p) {
    for (size_t i = 0; i < sizeof...(Mods); i++)
      x[i] = power_(x[i], p, mods[i]);
    return *this;
  }

  type &operator++() {
    for (size_t i = 0; i < sizeof...(Mods); i++)
      if ((++x[i]) >= mods[i])
        x[i] -= mods[i];
    return *this;
  }
  type &operator--() {
    for (size_t i = 0; i < sizeof...(Mods); i++)
      if ((--x[i]) < 0)
        x[i] += mods[i];
    return *this;
  }
  type operator++(int unused) {
    type res = *this;
    ++(*this);
    return res;
  }
  type operator--(int unused) {
    type res = *this;
    --(*this);
    return res;
  }

  friend type operator+(const type &lhs, const type &rhs) {
    type res = lhs;
    return res += rhs;
  }
  friend type operator-(const type &lhs, const type &rhs) {
    type res = lhs;
    return res -= rhs;
  }
  friend type operator*(const type &lhs, const type &rhs) {
    type res = lhs;
    return res *= rhs;
  }
  friend type operator/(const type &lhs, const type &rhs) {
    type res = lhs;
    return res /= rhs;
  }

  friend type operator+(const type &lhs, T rhs) {
    type res = lhs;
    return res += rhs;
  }

  friend type operator-(const type &lhs, T rhs) {
    type res = lhs;
    return res -= rhs;
  }

  friend type operator*(const type &lhs, T rhs) {
    type res = lhs;
    return res *= rhs;
  }

  friend type operator/(const type &lhs, T rhs) {
    type res = lhs;
    return res /= rhs;
  }

  friend type operator^(const type &lhs, large_int rhs) {
    type res = lhs;
    return res ^= rhs;
  }

  friend type power(const type &lhs, large_int rhs) { return lhs ^ rhs; }

  type operator-() const {
    type res = *this;
    for (size_t i = 0; i < sizeof...(Mods); i++)
      if (res.x[i])
        res.x[i] = mods[i] - res.x[i];
    return res;
  }

  friend bool operator==(const type &lhs, const type &rhs) {
    for (size_t i = 0; i < sizeof...(Mods); i++)
      if (lhs.x[i] != rhs.x[i])
        return false;
    return true;
  }
  friend bool operator!=(const type &lhs, const type &rhs) {
    return !(lhs == rhs);
  }

  friend istream &operator>>(istream &input, type &var) {
    T y;
    cin >> y;
    var = y;
    return input;
  }
};
} // namespace

// Explicitly make constexpr available for linkage.
template <typename T, class Enable, T... Mods>
constexpr T ModularIntegerImpl<T, Enable, Mods...>::mods[];

template <typename T, T... Mods>
using ModularInteger =
    ModularIntegerImpl<T, typename enable_if<is_integral<T>::value>::type,
                       Mods...>;

template <int32_t... Mods> using Mint32 = ModularInteger<int32_t, Mods...>;

template <int64_t... Mods> using Mint64 = ModularInteger<int64_t, Mods...>;

using MintP = Mint32<(int32_t)1e9+7>;
using MintNTT = Mint32<998244353>;
} // namespace lib


#line 1 "FHT.cpp"


#line 1 "BitTricks.cpp"


#line 4 "BitTricks.cpp"

namespace lib {
long long next_power_of_two(long long n) {
  if (n <= 0) return 1;
  return 1LL << (sizeof(long long) * 8 - 1 - __builtin_clzll(n) +
                 ((n & (n - 1LL)) != 0));
}
} // namespace lib


#line 1 "NTT.cpp"


#line 1 "DFT.cpp"


#line 5 "DFT.cpp"

namespace lib {
using namespace std;
namespace linalg {
template <typename Ring, typename Provider>
struct DFT {
  static vector<int> rev;
  static vector<Ring> fa;

  // function used to precompute rev for fixed size fft (n is a power of two)
  static void dft_rev(int n) {
    Provider()(n);
    int lbn = __builtin_ctz(n);
    if ((int)rev.size() < (1 << lbn))
      rev.resize(1 << lbn);
    int h = -1;
    for (int i = 1; i < n; i++) {
      if ((i & (i - 1)) == 0)
        h++;
      rev[i] = rev[i ^ (1 << h)] | (1 << (lbn - h - 1));
    }
  }

  static void dft_iter(Ring *p, int n) {
    Provider w;
    for (int L = 2; L <= n; L <<= 1) {
      for (int i = 0; i < n; i += L) {
        for (int j = 0; j < L / 2; j++) {
          Ring z = p[i + j + L / 2] * w[j + L / 2];
          p[i + j + L / 2] = p[i + j] - z;
          p[i + j] += z;
        }
      }
    }
  }

  static void swap(vector<Ring> &buf) { std::swap(fa, buf); }
  static void _dft(Ring *p, int n) {
    dft_rev(n);
    for (int i = 0; i < n; i++)
      if (i < rev[i])
        std::swap(p[i], p[rev[i]]);
    dft_iter(p, n);
  }
  static void _idft(Ring *p, int n) {
    _dft(p, n);
    reverse(p + 1, p + n);
    Ring inv = Provider().inverse(n);
    for (int i = 0; i < n; i++)
      p[i] *= inv;
  }

  static void dft(int n) { _dft(fa.data(), n); }

  static void idft(int n) { _idft(fa.data(), n); }

  static void dft(vector<Ring> &v, int n) {
    swap(v);
    dft(n);
    swap(v);
  }
  static void idft(vector<Ring> &v, int n) {
    swap(v);
    idft(n);
    swap(v);
  }

  static int ensure(int a, int b = 0) {
    int n = a+b;
    n = next_power_of_two(n);
    if ((int)fa.size() < n)
      fa.resize(n);
    return n;
  }

  static void clear(int n) { fill(fa.begin(), fa.begin() + n, 0); }

  template<typename Iterator>
  static void fill(Iterator begin, Iterator end) {
    int n = ensure(distance(begin, end));
    int i = 0;
    for(auto it = begin; it != end; ++it) {
      fa[i++] = *it;
    }
    for(;i < n; i++) fa[i] = Ring();
  }
};

template<typename DF, typename U>
static vector<U> retrieve(int n) {
  assert(n <= DF::fa.size());
  vector<U> res(n);
  for(int i = 0; i < n; i++) res[i] = (U)DF::fa[i];
  return res;
}

template<typename Ring, typename Provider>
vector<int> DFT<Ring, Provider>::rev = vector<int>();

template<typename Ring, typename Provider>
vector<Ring> DFT<Ring, Provider>::fa = vector<Ring>();
}
} // namespace lib


#line 1 "VectorN.cpp"


#line 1 "Traits.cpp"


#line 4 "Traits.cpp"

namespace lib {
using namespace std;
namespace traits {

template <typename...> struct make_void { using type = void; };

template <typename... T> using void_t = typename make_void<T...>::type;

/// keep caide
template <typename Iterator>
using IteratorCategory = typename iterator_traits<Iterator>::iterator_category;

/// keep caide
template <typename Container>
using IteratorCategoryOf = IteratorCategory<typename Container::iterator>;

/// keep caide
template <typename Iterator>
using IteratorValue = typename iterator_traits<Iterator>::value_type;

/// keep caide
template <typename Container>
using IteratorValueOf = IteratorValue<typename Container::iterator>;

/// keep caide
template <typename Iterator>
using IsRandomIterator =
    is_base_of<random_access_iterator_tag, IteratorCategory<Iterator>>;

/// keep caide
template <typename Iterator>
using IsInputIterator =
    is_base_of<input_iterator_tag, IteratorCategory<Iterator>>;

/// keep caide
template <typename Iterator>
using IsBidirectionalIterator =
    is_base_of<bidirectional_iterator_tag, IteratorCategory<Iterator>>;

/// keep caide
template <typename Container>
using HasRandomIterator =
    is_base_of<random_access_iterator_tag, IteratorCategoryOf<Container>>;

/// keep caide
template <typename Container>
using HasInputIterator =
    is_base_of<input_iterator_tag, IteratorCategoryOf<Container>>;

/// keep caide
template <typename Container>
using HasBidirectionalIterator =
    is_base_of<bidirectional_iterator_tag, IteratorCategoryOf<Container>>;
} // namespace traits
} // namespace lib


#line 5 "VectorN.cpp"

#define VEC_CONST_OP(op, typ) \
  type operator op(const typ rhs) const { \
    auto res = *this; \
    return res op##= rhs; \
  }

#define VEC_BIN_OP(op) \
  type& operator op##=(const type& rhs) { \
    if(rhs.size() > this->size()) \
      this->resize(rhs.size()); \
    int sz = this->size(); \
    for(int i = 0; i < (int)rhs.size(); i++) \
      (*this)[i] op##= rhs[i]; \
    for(int i = rhs.size(); i < sz; i++) \
      (*this)[i] op##= 0; \
    return *this; \
  } \
  VEC_CONST_OP(op, type)

#define VEC_SINGLE_OP(op, typ) \
  type& operator op##=(const typ rhs) { \
    for(auto& x : *this) \
      x op##= rhs; \
    return *this; \
  } \
  VEC_CONST_OP(op, typ)
  

namespace lib {
using namespace std;
template<typename T>
struct VectorN : vector<T> {
  using type = VectorN<T>;

  template <
      typename Container,
      typename enable_if<traits::HasInputIterator<Container>::value>::type * = nullptr>
  VectorN(const Container &container)
      : vector<T>(container.begin(), container.end()) {}

  VectorN(const initializer_list<T> &v)
      : vector<T>(v.begin(), v.end()) {}

  template<typename... Args>
  VectorN( Args&&... args ) 
      : vector<T>(std::forward<Args>(args)...) {}

  VEC_BIN_OP(+)
  VEC_BIN_OP(-)
  VEC_BIN_OP(*)

  VEC_SINGLE_OP(+, T&)
  VEC_SINGLE_OP(-, T&)
  VEC_SINGLE_OP(*, T&)
  VEC_SINGLE_OP(/, T&)
  VEC_SINGLE_OP(^, int64_t)

  type operator-() const {
    auto res = *this;
    for(auto& x : res) x = -x;
    return res;
  }

  type operator%(int n) const {
    // TODO: get rid of this
    // return *const_cast<type*>(this);
    return *this;
  }
};
} // namespace lib


#line 7 "NTT.cpp"

namespace lib {
using namespace std;
namespace linalg {
template<typename T>
struct MintRootProvider {
  static size_t max_sz;
  static T g;
  static vector<T> w;

  MintRootProvider() {
    if(g == 0) {
      auto acc = T::mod-1;
      while(acc % 2 == 0) acc /= 2, max_sz++;

      auto factors = nt::factors(T::mod - 1);
      for(g = 2; (typename T::type_int)g < T::mod; g++) {
        bool ok = true;
        for(auto f : factors) {
          if(power(g, (T::mod-1)/f) == 1) {
            ok = false;
            break;
          }
        }
        if(ok) break;
      }
      assert(g != 0);
    }
  }

  pair<T, T> roots(int num, int den) {
    auto p = g ^ ((long long)(T::mod - 1) / den * num);
    return {p, p.inverse()};
  }

  T operator()(int n, int k) {
    return power(g, (T::mod-1)/(n/k));
  }
  void operator()(int n) {
    n = max(n, 2);
    int k = max((int)w.size(), 2);
    assert(n <= (1LL << max_sz));
    if ((int)w.size() < n)
      w.resize(n);
    else
      return;
    w[0] = w[1] = 1;
    for (; k < n; k *= 2) {
      T step = power(g, (T::mod-1)/(2*k));
      for(int i = k; i < 2*k; i++)
        w[i] = (i&1) ? w[i/2] * step : w[i/2];
    }
  }
  T operator[](int i) {
    return w[i];
  }

  T inverse(int n) {
    return T(1) / n;
  }
};

template<typename T>
size_t MintRootProvider<T>::max_sz = 1;
template<typename T>
T MintRootProvider<T>::g = T();
template<typename T>
vector<T> MintRootProvider<T>::w = vector<T>();

template<typename T>
struct NTT : public DFT<T, MintRootProvider<T>> {
  using Parent = DFT<T, MintRootProvider<T>>;
  using Parent::fa;
  using Parent::dft;
  using Parent::idft;

  static void _convolve(const vector<T> &a) {
    int n = Parent::ensure(a.size(), a.size());
    for (size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < a.size() ? a[i] : T();
    Parent::dft(n);
    for (int i = 0; i < n; i++)
      fa[i] *= fa[i];
    Parent::idft(n);
  }

  static void _convolve(const vector<T> &a, const vector<T> &b) {
    if(std::addressof(a) == std::addressof(b))
      return _convolve(a);
    int n = Parent::ensure(a.size(), b.size());
    for (size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < a.size() ? a[i] : T();
    Parent::dft(n);
    // TODO: have a buffer for this
    auto fb = retrieve<Parent, T>(n);
    for(size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < b.size() ? b[i] : T();
    Parent::dft(n);
    for (int i = 0; i < n; i++)
      fa[i] *= fb[i];
    Parent::idft(n);
  }

  static vector<T> convolve(const vector<T>& a, const vector<T>& b) {
    int sz = (int)a.size() + b.size() - 1;
    _convolve(a, b);
    return retrieve<Parent, T>(sz);
  }
  
  static VectorN<T> transform(vector<T> a, int n) {
    a.resize(n);
    Parent::dft(a, n);
    return a;
  }

  static vector<T> itransform(vector<T> a, int n) {
    int sz = a.size();
    Parent::idft(a, sz);
    a.resize(min(n, sz));
    return a;
  }
};
}

struct NTTMultiplication {
  template<typename T>
  using Transform = linalg::NTT<T>;

  template <typename Field>
  vector<Field> operator()(const vector<Field> &a,
                           const vector<Field> &b) const {
    return linalg::NTT<Field>::convolve(a, b);
  };

  template<typename Field>
  inline VectorN<Field> transform(int n, const vector<Field>& p) const {
    int np = next_power_of_two(n);
    return linalg::NTT<Field>::transform(p, np);
  }

  template<typename Field>
  inline vector<Field> itransform(int n, const vector<Field>& p) const {
    return linalg::NTT<Field>::itransform(p, n);
  }

  template <typename Field, typename Functor, typename ...Ts>
  inline vector<Field> on_transform(
    int n,
    Functor& f,        
    const vector<Ts>&... vs) const {
    int np = next_power_of_two(n);
    return linalg::NTT<Field>::itransform(
      f(n, linalg::NTT<Field>::transform(vs, np)...), n);
  }
};
} // namespace lib


#line 1 "polynomial/Transform.cpp"


#line 4 "polynomial/Transform.cpp"

namespace lib {
using namespace std;
template<template <class> class T>
struct TransformMultiplication {
  template<typename Field>
  using Transform = T<Field>;

  template <typename Field>
  vector<Field> operator()(const vector<Field> &a,
                           const vector<Field> &b) const {
    return T<Field>::convolve(a, b);
  };

  template<typename Field>
  inline VectorN<Field> transform(int n, const vector<Field>& p) const {
    int np = next_power_of_two(n);
    return T<Field>::transform(p, np);
  }

  template<typename Field>
  inline vector<Field> itransform(int n, const vector<Field>& p) const {
    return T<Field>::itransform(p, n);
  }

  template <typename Field, typename Functor, typename ...Ts>
  inline vector<Field> on_transform(
    int n,
    Functor& f,        
    const vector<Ts>&... vs) const {
    int np = next_power_of_two(n);
    return T<Field>::itransform(
      f(n, T<Field>::transform(vs, np)...), n);
  }
};
} // namespace lib


#line 7 "FHT.cpp"

namespace lib {
using namespace std;
namespace linalg {
template <typename Ring>
struct FHT {
  using Provider = MintRootProvider<Ring>;
  using T = Ring;
  using U = make_unsigned_t<typename Ring::type_int>;
  using U64 = make_unsigned_t<typename Ring::large_int>;
  static vector<Ring> fa;
  static const int MAX_LG_N = 30;
  static vector<Ring> g, ig;
  
  static void precompute() {
    if(!g.empty()) return;
    Provider();
    g.resize(MAX_LG_N);
    ig.resize(MAX_LG_N);
    for(int i = 0; i < MAX_LG_N; i++) {
      Ring w = Provider::g ^ (((Ring::mod-1) >> (i + 2)) * 3);
      w = -w;
      Ring iw = w.inverse();
      g[i] = w;
      ig[i] = iw;
    }
  }

  static inline U& v(Ring& p) {
    return (U&)p.data();
  }

  static inline U v(const Ring& p) {
    return (U)p.data();
  }

  static void dft_iter(Ring *p, int n) {
    // decimation-in-time
    // natural to reverse ordering
    for (int B = n >> 1; B; B >>= 1) {
      Ring w = 1;
      for (int i = 0, twiddle = 0; i < n; i += B * 2) {
        for (int j = i; j < i + B; j++) {
          Ring z = p[j + B] * w;
          p[j + B] = p[j] - z;
          p[j] += z;
        }
        w *= g[__builtin_ctz(++twiddle)];
      }
    }
  }

  static void idft_iter(Ring *p, int n) {
    // decimation-in-frequency
    // reverse to natural ordering
    for (int B = 1; B < n; B <<= 1) {
      Ring w = 1;
      for (int i = 0, twiddle = 0; i < n; i += B * 2) {
        for (int j = i; j < i + B; j++) {
          Ring z = (p[j] - p[j + B]) * w;
          p[j] += p[j + B];
          p[j + B] = z;
        }
        w *= ig[__builtin_ctz(++twiddle)];
      }
    }
  }

  static void swap(vector<Ring> &buf) { std::swap(fa, buf); }
  static void _dft(Ring *p, int n) {
    precompute();
    dft_iter(p, n);
  }
  static void _idft(Ring *p, int n) {
    precompute();
    idft_iter(p, n);
    Ring inv = Provider().inverse(n);
    for (int i = 0; i < n; i++)
      p[i] *= inv;
  }

  static void dft(int n) { _dft(fa.data(), n); }

  static void idft(int n) { _idft(fa.data(), n); }

  static void dft(vector<Ring> &v, int n) {
    swap(v);
    dft(n);
    swap(v);
  }
  static void idft(vector<Ring> &v, int n) {
    swap(v);
    idft(n);
    swap(v);
  }

  static int ensure(int a, int b = 0) {
    int n = a+b;
    n = next_power_of_two(n);
    if ((int)fa.size() < n)
      fa.resize(n);
    return n;
  }

  static void clear(int n) { fill(fa.begin(), fa.begin() + n, 0); }

  template<typename Iterator>
  static void fill(Iterator begin, Iterator end) {
    int n = ensure(distance(begin, end));
    int i = 0;
    for(auto it = begin; it != end; ++it) {
      fa[i++] = *it;
    }
    for(;i < n; i++) fa[i] = Ring();
  }

  static void _convolve(const vector<T> &a) {
    int n = ensure(a.size(), a.size());
    for (size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < a.size() ? a[i] : T();
    dft(n);
    for (int i = 0; i < n; i++)
      fa[i] *= fa[i];
    idft(n);
  }

  static void _convolve(const vector<T> &a, const vector<T> &b) {
    if(std::addressof(a) == std::addressof(b))
      return _convolve(a);
    int n = ensure(a.size(), b.size());
    for (size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < a.size() ? a[i] : T();
    dft(n);
    // TODO: have a buffer for this
    auto fb = retrieve<FHT<T>, T>(n);
    for(size_t i = 0; i < (size_t)n; i++)
      fa[i] = i < b.size() ? b[i] : T();
    dft(n);
    for (int i = 0; i < n; i++)
      fa[i] *= fb[i];
    idft(n);
  }

  static vector<T> convolve(const vector<T>& a, const vector<T>& b) {
    int sz = (int)a.size() + b.size() - 1;
    _convolve(a, b);
    return retrieve<FHT<T>, T>(sz);
  }
  
  static VectorN<T> transform(vector<T> a, int n) {
    a.resize(n);
    dft(a, n);
    return a;
  }

  static vector<T> itransform(vector<T> a, int n) {
    int sz = a.size();
    idft(a, sz);
    a.resize(min(n, sz));
    return a;
  }
};

template<typename Ring>
vector<Ring> FHT<Ring>::fa = vector<Ring>();
template<typename Ring>
vector<Ring> FHT<Ring>::g = vector<Ring>();
template<typename Ring>
vector<Ring> FHT<Ring>::ig = vector<Ring>();
}

using FHTMultiplication = TransformMultiplication<linalg::FHT>;
} // namespace lib


#line 6 "tests/yosupo/fht-convolution.test.cpp"
using namespace std;
 
#define mp make_pair
#define mt make_tuple
#define pb push_back
#define ms(v, x) memset((v), (x), sizeof(v))
#define all(v) (v).begin(), (v).end()
#define ff first
#define ss second
#define iopt ios::sync_with_stdio(false); cin.tie(0)
#define untie(p, a, b) decltype(p.first) a = p.first, decltype(p.second) b = p.second
 
int gcd(int a, int b) { return b == 0 ? a : gcd(b, a%b); }
int power(int x, int p, int MOD) {
    if(p == 0) return 1%MOD;
    if(p == 1) return x%MOD;
    int res = power(x, p/2, MOD);
    res = (long long)res*res%MOD;
    if(p&1) res = (long long)res*x%MOD;
    return res;
}
 
typedef pair<int, int> ii;
typedef long double LD;
typedef vector<int> vi;

#pragma region template

using ll = long long;
using ull = unsigned long long;
constexpr ll TEN(int n) { return (n == 0) ? 1 : 10 * TEN(n - 1); }
template <class T> using V = vector<T>;
template <class T> using VV = V<V<T>>;

#ifdef LOCAL
struct PrettyOS {
    ostream& os;
    bool first;
    template <class T> auto operator<<(T&& x) {
        if (!first) os << ", ";
        first = false;
        os << x;
        return *this;
    }
};
template <class... T> void dbg0(T&&... t) {
    (PrettyOS{cerr, true} << ... << t);
}
#define dbg(...)                                            \
    do {                                                    \
        cerr << __LINE__ << " : " << #__VA_ARGS__ << " = "; \
        dbg0(__VA_ARGS__);                                  \
        cerr << endl;                                       \
    } while (false);
#else
#define dbg(...)
#endif

template <class T, class U>
ostream& operator<<(ostream& os, const pair<T, U>& p) {
    return os << "P(" << p.first << ", " << p.second << ")";
}

template <class T> ostream& operator<<(ostream& os, const V<T>& v) {
    os << "[";
    for (auto d : v) os << d << ", ";
    return os << "]";
}

struct Scanner {
    FILE* fp = nullptr;
    char line[1 << 15];
    size_t st = 0, ed = 0;
    void reread() {
        memmove(line, line + st, ed - st);
        ed -= st;
        st = 0;
        ed += fread(line + ed, 1, (1 << 15) - ed, fp);
    }
    bool succ() {
        while (true) {
            if (st == ed) {
                reread();
                if (st == ed) return false;
            }
            while (st != ed && isspace(line[st])) st++;
            if (st != ed) break;
        }
        if (ed - st <= 50) reread();
        return true;
    }
    template <class T, enable_if_t<is_same<T, string>::value, int> = 0>
    bool read_single(T& ref) {
        if (!succ()) return false;
        while (true) {
            succ();
            size_t sz = 1;
            while (st + sz < ed && !isspace(line[st + sz])) sz++;
            ref.append(line + st, sz);
            st += sz;
            if (st != ed) break;
        }
        return true;
    }
    template <class T, enable_if_t<is_integral<T>::value, int> = 0>
    bool read_single(T& ref) {
        if (!succ()) return false;
        bool neg = false;
        if (line[st] == '-') {
            neg = true;
            st++;
        }
        ref = T(0);
        while (isdigit(line[st])) {
            ref = 10 * ref + (line[st++] - '0');
        }
        if (neg) ref = -ref;
        return true;
    }
    template <class T> bool read_single(V<T>& ref) {
        for (auto& d : ref) {
            if (!read_single(d)) return false;
        }
        return true;
    }
    void read() {}
    template <class H, class... T> void read(H& h, T&... t) {
        bool f = read_single(h);
        assert(f);
        read(t...);
    }
    Scanner(FILE* _fp) : fp(_fp) {}
};

struct Printer {
  public:
    template <bool F = false> void write() {}
    template <bool F = false, class H, class... T>
    void write(const H& h, const T&... t) {
        if (F) write_single(' ');
        write_single(h);
        write<true>(t...);
    }
    template <class... T>
    void writeln(const T&... t) {
        write(t...);
        write_single('\n');
    }

    Printer(FILE* _fp) : fp(_fp) {}
    ~Printer() { flush(); }

  private:
    static constexpr size_t SIZE = 1 << 15;
    FILE* fp;
    char line[SIZE], small[50];
    size_t pos = 0;
    void flush() {
        fwrite(line, 1, pos, fp);
        pos = 0;
    }
    void write_single(const char& val) {
        if (pos == SIZE) flush();
        line[pos++] = val;
    }
    template <class T, enable_if_t<is_same<T, string>::value, int> = 0>
    void write_single(const T& val) {
        for (char c : val) write_single(c);
    }
    template <class T, enable_if_t<is_integral<T>::value, int> = 0>
    void write_single(T val) {
        if (pos > (1 << 15) - 50) flush();
        if (val == 0) {
            write_single('0');
            return;
        }
        if (val < 0) {
            write_single('-');
            val = -val;  // todo min
        }
        size_t len = 0;
        while (val) {
            small[len++] = char('0' + (val % 10));
            val /= 10;
        }
        reverse(small, small + len);
        memcpy(line + pos, small, len);
        pos += len;
    }
    template <class T> void write_single(const V<T>& val) {
        auto n = val.size();
        for (size_t i = 0; i < n; i++) {
            if (i) write_single(' ');
            write_single(val[i]);
        }
    }
};

#pragma endregion

using namespace lib;
using mint = MintNTT;

int32_t main(){
    Scanner sc(stdin);
    Printer pr(stdout);

    int n, m;
    sc.read(n, m);

    V<mint> a(n);
    for (int i = 0; i < n; i++) {
        int x;
        sc.read(x);
        a[i] = x;
    }
    V<mint> b(m);
    for (int i = 0; i < m; i++) {
        int x;
        sc.read(x);
        b[i] = x;
    }
    auto c = FHTMultiplication()(a, b);
    for (auto x: c) {
        pr.write((int)x);
        pr.write(' ');
    }
    pr.writeln();
    return 0;
}
Back to top page