g | x | w | all
Bytes Lang Time Link
nan241224T011321Z138 Aspe
044Rust241101T023824ZAnders K
nan241029T121711Zjdt
nan241028T152332Zovs

Python

Python port of @Anders Kaseorg's Rust answer.

import sys
from typing import List, Tuple

SIZE = 8
COLUMN_MASK = 0x0101010101010101

def matrix_rows(matrix: int) -> List[int]:
    """Convert a 64-bit integer matrix to a list of 8 byte rows (little endian)."""
    return list(matrix.to_bytes(SIZE, 'little'))

def rows_matrix(rows: List[int]) -> int:
    """Convert a list of 8 byte rows back to a 64-bit integer matrix (little endian)."""
    return int.from_bytes(bytes(rows), 'little')

def trailing_zeros(x: int, size: int = SIZE) -> int:
    """Calculate the number of trailing zero bits in an integer."""
    if x == 0:
        return size
    return (x & -x).bit_length() - 1

def binary_search_desc(lst: List[int], target: int) -> int:
    """
    Perform a binary search on a descending sorted list.
    Returns the first index where lst[index] <= target.
    If no such index exists, returns len(lst).
    """
    low, high = 0, len(lst)
    while low < high:
        mid = (low + high) // 2
        if lst[mid] > target:
            low = mid + 1
        else:
            high = mid
    return low

def make_coeffs(n: int) -> Tuple[List[int], int]:
    """
    Generate coefficient table and number of unique matrices for a given size n.
    """
    coeffs = [0] * (n << n)  # Equivalent to n * (2^n)
    for row in reversed(range(1 << n)):
        coeffs[(n - 1) << n | row] = (1 << n) - 1 - row
        for i in reversed(range(n - 1)):
            if row + 1 < (1 << n):
                coeffs[i << n | row] = coeffs[(i + 1) << n | row] + coeffs[i << n | (row + 1)]
            else:
                coeffs[i << n | row] = coeffs[(i + 1) << n | row]
    num_matrices = sum(coeffs[i << n] for i in range(n)) + 1
    return coeffs, num_matrices

def encode(n: int, coeffs: List[int], matrix: int) -> int:
    """
    Encode a matrix into a unique integer code based on sorted rows and coefficients.
    """
    rows = matrix_rows(matrix)[:n]
    sorted_rows = sorted(rows)
    return sum(coeffs[(i << n) | row] for i, row in enumerate(sorted_rows))

def decode(n: int, coeffs: List[int], matrix_code: int) -> int:
    """
    Decode an integer code back into the original matrix.
    """
    rows = [0] * SIZE
    for i in range(n):
        search_range = coeffs[i << n : (i + 1) << n]
        row = binary_search_desc(search_range, matrix_code)
        rows[i] = row
        if (i << n | row) >= len(coeffs):
            print(f"Error: Index out of bounds during decode. i={i}, row={row}")
            sys.exit(1)
        matrix_code -= coeffs[(i << n) | row]
    return rows_matrix(rows)

def main():
    for n in range(2, 7):
        # Print trivial matrices information
        for i in range(1, n + 1):
            print(f"{n} {i} 0: {n * n - i + 1}")

        coeffs, num_matrices = make_coeffs(n)
        ranks = [0] * num_matrices
        split = num_matrices - 1

        # Assign ranks based on coefficients
        for i in reversed(range(n)):
            new_split = split - coeffs[i << n]
            new_ranks = ranks[:split].copy()
            old_ranks = ranks[split:]
            for k, matrix_code in enumerate(range(new_split, split)):
                matrix = decode(n, coeffs, matrix_code)
                row = (matrix >> (SIZE * (n - 1))) & 0xFF
                tz = trailing_zeros(row, SIZE)
                column = (matrix >> tz) & COLUMN_MASK
                new_matrix = matrix ^ (column * row)
                encoded = encode(n, coeffs, new_matrix)
                if encoded < split:
                    print(f"Error: Encoded value {encoded} is less than split {split}")
                    sys.exit(1)
                square_rank = old_ranks[encoded - split] + 1
                new_ranks[k + new_split] = square_rank
            ranks[:split] = new_ranks
            split = new_split

        # Initialize flood
        flood = [0 if rank == n else (1 << rank) for rank in ranks]
        reached = [0] * (SIZE + 1)
        flips = 0

        # Perform flood fill
        while True:
            new_reached = [~0] * (SIZE + 1)
            for b, rank in zip(flood, ranks):
                new_reached[rank] &= b
            unfinished = False
            for i in range(0, n + 1):
                for j in range(1, i):
                    if ((new_reached[i] >> j) & 1) == 0:
                        unfinished = True
                    elif ((reached[i] >> j) & 1) == 0:
                        print(f"{n} {i} {j}: {flips}")
            reached = new_reached
            if not unfinished:
                break

            # Update flood
            new_flood = []
            for matrix_code, b in enumerate(flood):
                matrix = decode(n, coeffs, matrix_code)
                for i_row in range(n):
                    for j_col in range(n):
                        bit = 1 << (SIZE * i_row + j_col)
                        flipped_matrix = matrix ^ bit
                        encode_code = encode(n, coeffs, flipped_matrix)
                        if encode_code >= len(flood):
                            print(f"Error: Encoded value {encode_code} out of bounds for flood")
                            sys.exit(1)
                        b |= flood[encode_code]
                new_flood.append(b)
            flood = new_flood
            flips += 1

if __name__ == "__main__":
    main()

Attempt This Online!

Rust, score ≈ 44

The main idea here is to take advantage of symmetry by avoiding redundant processing of matrices obtained by rearranging the rows of other processed matrices.

This uses a different output format because for each \$n\$ it computes the nontrivial values in ascending order by value rather than positionally (which is allowed by the rules):

2 1 0: 4
2 2 0: 3
2 2 1: 1
…

On my system, this completes \$n ≤ 5\$ in 0.2 seconds, prints 10 additional entries from \$n = 6\$ within the 1 minute time limit, and completes \$n ≤ 6\$ in 4 minutes. The complete values for \$n = 6\$, converted back to the sample output format, are

n = 6:

0
36 0
35 12 0
34 14 8 0
33 14 9 4 0
32 14 8 6 3 0
31 14 8 5 3 1 0

Build with cargo build --release and run with target/release/rank.

Cargo.toml

[package]
name = "rank"
version = "0.1.0"
edition = "2021"

[dependencies]
rayon = "1.10.0"

src/main.rs

use rayon::prelude::*;
use std::cmp::Reverse;

type Matrix = u64;
type Row = u8;
const SIZE: usize = 8;
const COLUMN_MASK: Matrix = Matrix::MAX / !(!0 << SIZE);

fn matrix_rows(matrix: Matrix) -> [Row; SIZE] {
    matrix.to_le_bytes()
}

fn rows_matrix(rows: [Row; SIZE]) -> Matrix {
    Matrix::from_le_bytes(rows)
}

fn make_coeffs(n: usize) -> (Vec<usize>, usize) {
    let mut coeffs = vec![0; n << n];
    for row in (0..!(!0 << n)).rev() {
        coeffs[(n - 1) << n | row] = !(!0 << n) - row;
        for i in (0..n - 1).rev() {
            coeffs[i << n | row] = coeffs[(i + 1) << n | row] + coeffs[i << n | (row + 1)];
        }
    }
    let num_matrices = (0..n).map(|i| coeffs[i << n as usize]).sum::<usize>() + 1;
    (coeffs, num_matrices)
}

fn encode(n: usize, coeffs: &[usize], matrix: Matrix) -> usize {
    let mut rows = matrix_rows(matrix);
    rows[0..n].sort();
    rows[0..n]
        .iter()
        .enumerate()
        .map(|(i, &row)| coeffs[i << n | row as usize])
        .sum()
}

fn decode(n: usize, coeffs: &[usize], mut matrix_code: usize) -> Matrix {
    let mut rows = [0; SIZE];
    for i in 0..n {
        let row = coeffs[i << n..(i + 1 << n) - 1]
            .binary_search_by_key(&Reverse(&matrix_code), Reverse)
            .unwrap_or_else(|coeff| coeff);
        rows[i] = row as Row;
        matrix_code -= coeffs[i << n | row];
    }
    return rows_matrix(rows);
}

fn main() {
    for n in 2..=6 {
        for i in 1..=n {
            // Trivial: matrix of all 1s except i - 1 diagonal 0s
            println!("{n} {i} 0: {}", n * n - i + 1);
        }

        let (coeffs, num_matrices) = make_coeffs(n);

        let mut ranks = vec![0u8; num_matrices];
        let mut split = num_matrices - 1;
        for i in (0..n).rev() {
            let new_split = split - coeffs[i << n];
            let (new_ranks, old_ranks) = ranks.split_at_mut(split);
            new_ranks[new_split..]
                .par_iter_mut()
                .zip(new_split..split)
                .for_each(|(square_rank, matrix_code)| {
                    let matrix = decode(n, &coeffs, matrix_code);
                    let row = matrix >> (SIZE * (n - 1));
                    let column = (matrix >> row.trailing_zeros()) & COLUMN_MASK;
                    *square_rank =
                        old_ranks[encode(n, &coeffs, matrix ^ (column * row)) - split] + 1;
                });
            split = new_split;
        }

        let mut flood: Vec<Row> = ranks
            .iter()
            .map(|&rank| if rank == n as u8 { 0 } else { 1 << rank })
            .collect();

        let mut reached = [0; SIZE + 1];
        let mut flips = 0;
        while {
            let mut new_reached = [!0; SIZE + 1];
            for (&b, &rank) in flood.iter().zip(&ranks) {
                new_reached[rank as usize] &= b;
            }
            let mut unfinished = false;
            for i in 0..=n {
                for j in 1..i {
                    if (new_reached[i] >> j) & 1 == 0 {
                        unfinished = true;
                    } else if (reached[i] >> j) & 1 == 0 {
                        println!("{n} {i} {j}: {flips}");
                    }
                }
                reached[i] = new_reached[i];
            }
            unfinished
        } {
            flood = flood
                .par_iter()
                .copied()
                .enumerate()
                .map(|(matrix_code, mut b)| {
                    let matrix = decode(n, &coeffs, matrix_code);
                    for i in 0..n {
                        for j in 0..n {
                            b |= flood[encode(n, &coeffs, matrix ^ (1 << (SIZE * i + j)))];
                        }
                    }
                    b
                })
                .collect();
            flips += 1;
        }
    }
}

C++ (clang)

Direct port of ovs's answer with some parallelization thrown in.

#include <iostream>
#include <vector>
#include <cstring>

void flips(int n) {
    int N = 1 << (n * n);
    std::vector<char> ranks(N), X(N), Y(N);

    // Compute ranks of all 2^(n*n) matrices
    ranks[0] = 0;
    int mask = (N - 1) / ((1 << n) - 1);
    for (int x = 1; x < N; ++x) {
        int matrix = x;
        int pivot = matrix % (1 << n);
        int lsb = pivot & -pivot;
        matrix >>= n;
        ranks[x] = lsb ? ranks[matrix ^ (matrix / lsb & mask) * pivot] + 1 : ranks[matrix];
    }

    // Result table
    std::vector<std::vector<int>> results(n + 1, std::vector<int>(n + 1, 0));
    for (int i = 1; i <= n; ++i) results[i][0] = n * n - i + 1; // First column

    // For each j, spread js until j is the maximum value.
    // The maximum number of flips needed for rank i -> j is equivalent
    // to the number of iterations where `X` contained i.
    for (int j = 1; j < n; ++j) {
        std::memcpy(Y.data(), ranks.data(), N);
        int w;
        do {
            w = 0;
            std::memcpy(X.data(), Y.data(), N);

            #pragma omp parallel for
            for (int x = 0; x < N; ++x) {
                if (X[x] == j) {
                    for (int b = 1; b < N; b *= 2) Y[x ^ b] = j;
                } else if (X[x] > j) {
                    w |= 1 << X[x];
                }
            }
            for (int i = j + 1; i <= n; ++i) results[i][j] += (w >> i) & 1;
        } while (w);
    }

    // Print results
    for (int i = 0; i <= n; ++i) {
        for (int j = 0; j <= i; ++j)
            std::cout << results[i][j] << ' ';
        std::cout << '\n';
    }
}

int main() {
    for (int n = 2; n <= 5; ++n) {
        std::cout << "n=" << n << '\n';
        flips(n);
    }
    return 0;
}

Try it online!

C (clang -O2)

Locally this takes 15 seconds for n=5. For computing further results, some of the ints have to be changed to long, but it won't be to close to completing n=6 within a minute.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>


void flips(int n) {
  int N = 1 << n*n;
  char *ranks = malloc(sizeof *ranks * N),
       *X = malloc(sizeof *X * N),
       *Y = malloc(sizeof *Y * N);

  // compute ranks of all 2^(n*n) matrices
  ranks[0] = 0;
  int mask = (N - 1) / ((1<<n) - 1);
  for (int x=1; x<N; x++) {
    int matrix = x,
        pivot = matrix%(1<<n),
        lsb = pivot&-pivot;
    matrix >>= n;
    ranks[x] = lsb ? ranks[matrix ^ (matrix/lsb & mask) * pivot]+1 : ranks[matrix];
  }

  // result table
  int results[n+1][n+1];
  memset(results, 0, sizeof results);
  for(int i=1;i<=n;i++) results[i][0] = n*n-i+1; // first column

  // for each j, spread js until j is the maximum value.
  // the maximum number of flips needed for rank i -> j is equivalent
  // to the number of iterations where `X` contained i.
  for (int j=1; j<n; j++) {
    memcpy(Y, ranks, N);
    int w;
    do {
      w=0, memcpy(X, Y, N);
      for(int x=0; x<N; x++) {
        if (X[x]==j)
          for (int b=1;b<N;b*=2) Y[x^b] = j;
        else if (X[x]>j)
          w|=1<<X[x];
      }
      for(int i=j+1;i<=n;i++) results[i][j] += w>>i&1;
    } while (w);
  }

  // print results
  for (int i=0; i<=n; i++) {
    for (int j=0; j<=i; j++)
      printf("%d ", results[i][j]);
    printf("\n");
  }
  free(ranks), free(X), free(Y);
}

int main() {
  for(int n=2;n<=5;n++)
    printf("n=%d\n", n),
    flips(n);
  return 0;
}

Try it online!