Fast Kitamasa

線形漸化式\(a_i = c_1 * a_{i - 1} + \cdots + c_d * a_{i - d}\)があるとき、\(c_0 = -1\)としてfast_kitamasaにかけると、 \(a_n = b_0 * a_{d - 1} + \cdots + b_{d - 1} * a_0\)となる\(b\)を\(O(d \log d \log n)\)で求める

#include "formal_power_series.hpp"
#include <vector>
using i64 = long long;

template<class F, class FM>
std::vector<F> fast_kitamasa(std::vector<F> c, i64 n) {
  using fps = FPS<FM>;
  i64 k = c.size() - 1;
  fps cf(std::move(c));
  fps ic = cf.inv(k - 1);
  
  i64 c_len = bound_pow2(k - 1 + cf.size() - 1);
  i64 ic_len = bound_pow2(k - 1 + ic.size() - 1);
  auto dc = cf.resized(c_len).dft(c_len);
  auto dic = std::move(ic).dft(ic_len);

  i64 b_len = bound_pow2(k);
  fps b(std::vector<F>(b_len, F(0)));
  b[k - 1] = F(1);
  i64 m = bound_pow2(n);
  while(m) {
    auto bt = std::move(b).dft(b_len * 2);
    bt *= bt;
    auto beta = std::move(bt).idft();
    
    auto dbeta = beta.resized(k - 1).dft(ic_len);
    auto q = std::move(dbeta *= dic).idft(c_len);
    for(int i = k - 1; i < c_len; i++) q[i] = F(0);
    fps cfq = std::move(std::move(q).dft(c_len) *= dc).idft(k - 1 + cf.size() - 1);
    beta -= cfq;

    b = fps(std::vector<F>(b_len * 2));
    for(int i = k - 1; i < 2 * k - 1; i++) {
      b[i - (k - 1)] = beta[i];
    }
    if(m & n) {
      F freq = b[0];
      for(int i = 0; i < k - 1; i++) {
        b[i] = b[i + 1] + freq * cf[i + 1];
      }
      b[k - 1] = freq * cf[k];
    }
    m >>= 1;
  }
  b.resize(k);
  return std::move(b.coef);
}