难点在于卡 __int128
(?)。
首先 \(N>K\) 显然无解,只需考虑 \(N\le K\) 的情况。然而这并没有什么用。
把 \(b\) 看作集合,显然 \(b_i\subset b_{i+1}\)。所以令 \(f_{n,i}\) 为考虑到 \(b_n\) 且 \(|b_n|=i\) 的方案数,集合元素无序,即选择 \(\{A,B,C\}\) 或者 \(\{A,B,D\}\in \{A,B,C,D\}\) 看作不同方案,且不考虑其它 \(k-i\) 种元素。有如下转移:
即考虑 \(b_n\setminus b_{n-1}\) 的大小,从 \(b_n\) 中有的选出 \(|b_n\setminus b_{n-1}|\) 个一定要选,另外 \(|b_{n-1}|\) 个元素对于 \(a_n\) 可以任意选择。
把转移形式写得好看一点:
注意到 \(f\) 的转移组合数拆开后可以写成卷积的形式,于是我们有了 \(O(NK\log K)\) 的做法,然而这还是没有什么用。
但是注意到,\(f_{n}\) 其实能从任意的 \(a+b=n\) 转移而来,具体地:
原理是类似的,考虑 \(b_n\) 和 \(b_a\) 的关系,枚举 \(b_a\) 的大小,从 \(i\) 个元素中选出 \(j\) 个来自 \(b_a\),剩下 \(b_{a+1}\setminus b_a,b_{a+2}\setminus b_a,\cdots ,b_{n}\setminus b_a\) 这些集合显然也满足前面是后面的子集,于是对这 \(b\) 个集合,用 \(f_{b,i-j}\) 枚举这样的集合序列的方案数,最后这些集合对于 \(b_a\) 中原本有的元素都可以任意挑选。
然后就是套路了,令 \(a=\left\lfloor\dfrac{n}{2}\right\rfloor\),\(b=\left\lceil\dfrac{n}{2}\right\rceil\),分治求解即可。复杂度 \(O(K\log K\log N)\)。
要写任意模数 NTT,不能 #define int __int128
。
彩蛋:
(官方题解:)We decided to ask the answer modulo \(10^9+7\) to not let the participants easily guess that these problem requires FFT ? So, in order to get accepted you had to implement one of the methods to deal with the large modulo in polynomial multiplication using FFT.
// Problem: Transforming Sequence
// Contest: Luogu
// URL: https://www.luogu.com.cn/problem/CF623E
// Memory Limit: 250 MB
// Time Limit: 7000 ms
//
// Powered by CP Editor (https://cpeditor.org)
#include <bits/stdc++.h>
#define li __int128
using namespace std;
namespace vbzIO {
char ibuf[(1 << 20) + 1], *iS, *iT;
#if ONLINE_JUDGE
#define gh() (iS == iT ? iT = (iS = ibuf) + fread(ibuf, 1, (1 << 20) + 1, stdin), (iS == iT ? EOF : *iS++) : *iS++)
#else
#define gh() getchar()
#endif
#define mt make_tuple
#define mp make_pair
#define fi first
#define se second
#define pc putchar
#define pb emplace_back
#define ins insert
#define era erase
typedef tuple<int, int, int> tu3;
typedef pair<int, int> pi;
inline li rd() {
char ch = gh();
li x = 0;
bool t = 0;
while (ch < '0' || ch > '9') t |= ch == '-', ch = gh();
while (ch >= '0' && ch <= '9') x = (x << 1) + (x << 3) + (ch ^ 48), ch = gh();
return t ? ~(x - 1) : x;
}
inline void wr(int x) {
if (x < 0) x = ~(x - 1), pc('-');
if (x > 9) wr(x / 10);
pc(x % 10 + '0');
}
}
using namespace vbzIO;
const int N = 2e5 + 100;
const int G = 3;
const int P[3] = { 469762049, 998244353, 1004535809 };
const int p = 1e9 + 7;
li n;
int k;
int fac[N], ifac[N];
int Add(int x, int y, int P) { return x += y, (x >= P) ? x - P : x; }
int Mul(int x, int y, int P) { return 1ll * x * y % P; }
int qpow(int p, int q, int P) {
int res = 1;
for (; q; q >>= 1, p = Mul(p, p, P))
if (q & 1) res = Mul(res, p, P);
return res;
}
void init(int lim) {
fac[0] = 1;
for (int i = 1; i <= lim; i++)
fac[i] = Mul(fac[i - 1], i, p);
ifac[lim] = qpow(fac[lim], p - 2, p);
for (int i = lim - 1; ~i; i--)
ifac[i] = Mul(ifac[i + 1], i + 1, p);
}
void exgcd(li a, li b, li &x, li &y) {
if (!b) return x = 1, y = 0, void();
exgcd(b, a % b, y, x), y -= a / b * x;
}
int CRT(int *a) {
li SP = 1, res = 0;
for (int i = 0; i < 3; i++) SP *= P[i];
for (int i = 0; i < 3; i++) {
li m = SP / P[i], b, y;
exgcd(m, P[i], b, y);
(res += a[i] * m * b) %= SP;
}
return (res % SP + SP) % SP % p;
}
void NTT(int *f, int len, int lim, int op, int id) {
static int tr[N];
int iG = qpow(G, P[id] - 2, P[id]);
for (int i = 1; i < len; i++)
tr[i] = (tr[i >> 1] >> 1) | ((i & 1) << (lim - 1));
for (int i = 0; i < len; i++)
if (i < tr[i]) swap(f[i], f[tr[i]]);
for (int o = 2, k = 1; k < len; o <<= 1, k <<= 1) {
int tg = qpow(~op ? G : iG, (P[id] - 1) / o, P[id]);
for (int i = 0; i < len; i += o) {
for (int j = 0, w = 1; j < k; j++, w = Mul(w, tg, P[id])) {
int x = f[i + j], y = Mul(w, f[i + j + k], P[id]);
f[i + j] = Add(x, y, P[id]), f[i + j + k] = Add(x, P[id] - y, P[id]);
}
}
}
if (~op) return;
int iv = qpow(len, P[id] - 2, P[id]);
for (int i = 0; i < len; i++) f[i] = Mul(f[i], iv, P[id]);
}
typedef vector<int> poly;
poly tp;
map<int, poly> ft;
poly Mul(poly x, poly y, int id) {
static int tf[N], tg[N];
int len = 1, lim = 0, szx = x.size(), szy = y.size();
while (len < szx + szy) len <<= 1, lim++;
for (int i = 0; i < len; i++) tf[i] = tg[i] = 0;
for (int i = 0; i < szx; i++) tf[i] = x[i];
for (int i = 0; i < szy; i++) tg[i] = y[i];
NTT(tf, len, lim, 1, id), NTT(tg, len, lim, 1, id);
for (int i = 0; i < len; i++) tf[i] = Mul(tf[i], tg[i], P[id]);
NTT(tf, len, lim, -1, id);
poly res;
for (int i = 0; i < szx + szy - 1; i++) res.pb(tf[i]);
return res;
}
poly MTT(poly x, poly y, int p) {
poly c[3], res;
int szx = x.size(), szy = y.size();
for (int i = 0; i < 3; i++) c[i] = Mul(x, y, i);
for (int i = 0; i < szx + szy - 1; i++) {
int t[3] = { c[0][i], c[1][i], c[2][i] };
res.pb(CRT(t));
}
return res;
}
poly conq(int len) {
if (ft.find(len) != ft.end()) return ft[len];
int la = len >> 1, lb = len - la, bs = qpow(2, lb, p);
poly lhs = conq(la), res;
poly rhs = (la == lb) ? lhs : conq(lb);
for (int i = 0, w = 1; i < lhs.size(); i++, w = Mul(w, bs, p))
lhs[i] = Mul(lhs[i], Mul(w, ifac[i], p), p);
for (int i = 0; i < rhs.size(); i++)
rhs[i] = Mul(rhs[i], ifac[i], p);
res = MTT(lhs, rhs, p);
for (int i = 0; i < res.size() && i <= k; i++) res[i] = Mul(res[i], fac[i], p);
res.resize(k + 1);
return ft[len] = res;
}
int main() {
n = rd(), k = rd();
if (n > k) return puts("0"), 0;
tp = poly(k + 1, 1), tp[0]--, init(k + 10), ft[1] = tp;
poly res = conq(n);
int ans = 0;
for (int i = 1; i < res.size(); i++)
ans = Add(ans, Mul(res[i], Mul(fac[k], Mul(ifac[i], ifac[k - i], p), p), p), p);
wr(ans);
return 0;
}