Codeforces
CF Step
Youtube Linkedin Discord Toggle Dark/Light/Auto mode Toggle Dark/Light/Auto mode Toggle Dark/Light/Auto mode Back to homepage

Code: Grid Path

#include <bits/stdc++.h>

typedef uint32_t u32;
typedef uint64_t u64;

int main() {
    std::ios::sync_with_stdio(false);
    std::cin.tie(nullptr);
    std::cout.tie(nullptr);
    int n, m;
    u32 P;
    std::cin >> n >> m >> P;
    auto dec = [&](const u32 x) -> u32 { return std::min(x, x - P); };
    int w = m + m, pans = w - 1;
    auto pre = [&](int x) -> int { return x; };
    auto suf = [&](int x) -> int { return m - 1 + x; };
    std::vector T(w, std::vector<u32>(w)), nT = T;
    std::vector<u32> dp(w), ndp(w);
    T[0][pans] = T[pans][pans] = 1;
    for (int l = 0; l < m; l++) {
        for (int r = l; r < m; r++) {
            std::vector<std::pair<int, u32>> vec;
            vec.emplace_back(0, 1);
            if (l)
                vec.emplace_back(pre(l), P - 1);
            if (r + 1 < m)
                vec.emplace_back(suf(m - r - 1), P - 1);
            auto ins = [&](int i) {
                if (l == 0) {
                    dp[i]++;
                }
                for (auto [x, y] : vec) {
                    T[x][i] = dec(T[x][i] + y);
                }
            };
            ins(0);
            for (int i = r + 1; i < m; i++)
                ins(pre(i));
            for (int i = 1; i <= l; i++)
                ins(suf(m - i));
        }
    }
    auto trans = [&]() {
        std::fill(ndp.begin(), ndp.end(), 0);
        for (int x = 0; x < w; x++) {
            for (int y = 0; y < w; y++) {
                ndp[y] = (ndp[y] + (u64)dp[x] * T[x][y]) % P;
            }
        }
        dp.swap(ndp);
    };
    auto trans_T = [&]() {
        for (int i = 0; i < w; i++) {
            std::fill(nT[i].begin(), nT[i].end(), 0);
        }
        for (int x = 0; x < w; x++) {
            for (int z = 0; z < w; z++) {
                __uint128_t sum = 0;
#pragma GCC unroll 8
                for (int y = 0; y < w; y++) {
                    sum += (u64)T[x][y] * T[y][z];
                }
                nT[x][z] = sum % P;
            }
        }
        for (int i = 0; i < w; i++) {
            std::copy(nT[i].begin(), nT[i].end(), T[i].begin());
        }
    };
    while (n) {
        if (n & 1)
            trans();
        n >>= 1;
        if (n)
            trans_T();
    }
    std::cout << dp[pans] << std::endl;
    return 0;
}