g | x | w | all
Bytes Lang Time Link
nanRust250126T140436Z138 Aspe
1785Java 11200314T082140ZMiles
169Haskell200222T161043ZChristia
nanJava 8200119T233008ZMiles
nanJavaScript Node.js200118T152132ZArnauld

Rust, \$n=8\$ in about 36.211623557s

Rust port of @Arnauld's JavaScript answer.

Try it online!

use std::collections::HashSet;
use std::time::Instant;

fn build(n: usize) -> usize {
    let mut layer = vec![vec![0; n]; n];
    let mut cube = HashSet::new();
    let mut count = 0;

    for y in 0..n {
        for i in 1..=(n - y) {
            layer[y][0] = i as u8;
            fill(
                n - i,
                y == 0,
                &mut layer,
                &mut cube,
                &mut count,
                n,
            );
            layer[y][0] = 0;
        }
    }

    count
}

fn fill(
    k: usize,
    align_top: bool,
    layer: &mut Vec<Vec<u8>>,
    cube: &mut HashSet<String>,
    count: &mut usize,
    n: usize,
) {
    if k == 0 {
        let current_str = layer_to_string(layer);
        if !cube.contains(&current_str) {
            *count += 1;

            cube.insert(current_str.clone());

            let rotated90 = rotate(n, layer);
            cube.insert(layer_to_string(&rotated90));

            let rotated180 = rotate(n, &rotated90);
            cube.insert(layer_to_string(&rotated180));

            let rotated270 = rotate(n, &rotated180);
            cube.insert(layer_to_string(&rotated270));

            let mirrored = mirror(layer);
            cube.insert(layer_to_string(&mirrored));

            let mirrored_rot90 = rotate(n, &mirrored);
            cube.insert(layer_to_string(&mirrored_rot90));

            let mirrored_rot180 = rotate(n, &mirrored_rot90);
            cube.insert(layer_to_string(&mirrored_rot180));

            let mirrored_rot270 = rotate(n, &mirrored_rot180);
            cube.insert(layer_to_string(&mirrored_rot270));
        }
        return;
    }

    let y0 = layer.iter().position(|row| row.iter().any(|&v| v != 0)).unwrap_or(n);
    if y0 >= n {
        return;
    }

    let y_start = (y0 as isize - 1).max(0) as usize;

    for y in y_start..n {
        for x in 0..n {
            if layer[y][x] == 0 {
                let has_adjacent = (y > 0 && layer[y - 1][x] != 0)
                    || (y < n - 1 && layer[y + 1][x] != 0)
                    || (x > 0 && layer[y][x - 1] != 0)
                    || (x < n - 1 && layer[y][x + 1] != 0);

                if has_adjacent {
                    let max_i = if align_top {
                        k
                    } else {
                        k.saturating_sub(y0 + 1)
                    };

                    for i in 1..=max_i {
                        layer[y][x] = i as u8;
                        fill(
                            k - i,
                            align_top || (y == 0),
                            layer,
                            cube,
                            count,
                            n,
                        );
                        layer[y][x] = 0;
                    }
                }
            }
        }
    }
}

fn rotate(n: usize, layer: &Vec<Vec<u8>>) -> Vec<Vec<u8>> {
    let mut rotated = vec![vec![0; n]; n];
    for y in 0..n {
        for x in 0..n {
            rotated[y][x] = layer[n - x - 1][y];
        }
    }
    align(rotated)
}

fn mirror(layer: &Vec<Vec<u8>>) -> Vec<Vec<u8>> {
    let mut mirrored = layer.clone();
    mirrored.reverse();
    align(mirrored)
}

fn align(mut layer: Vec<Vec<u8>>) -> Vec<Vec<u8>> {
    let n = layer.len();
    if n == 0 {
        return layer;
    }

    while layer[0].iter().all(|&v| v == 0) {
        let first_row = layer.remove(0);
        layer.push(first_row);
    }

    while (0..n).all(|y| layer[y][0] == 0) {
        for row in &mut layer {
            row.remove(0);
            row.push(0);
        }
    }

    layer
}

fn layer_to_string(layer: &Vec<Vec<u8>>) -> String {
    layer
        .iter()
        .flat_map(|row| row.iter())
        .map(|v| v.to_string())
        .collect::<Vec<_>>()
        .join(",")
}

fn main() {
    for n in 1..=8 {
        let start = Instant::now();
        let result = build(n);
        let duration = start.elapsed();
        println!("N = {} --> {}", n, result);
        println!("time: {:?}", duration);
    }
}

Java 11, \$n=17\$ in about 8.5 minutes

Based on Haskell solution by Christian Sievers – upvote his!

This answer is the result of learning enough Haskell to be able to understand Christian's answer, translating it into Java, applying numerous micro-optimizations, and throwing multiple cores at it. The exact runtime varies significantly depending on the number of cores available; this timing result is from my own two-core machine. A 48-core EC2 c5.24xlarge is able to compute \$n=17\$ in 16 seconds, and \$n=20\$ in 18 minutes.

Parallelism can be disabled by adding the JVM argument -Djava.util.concurrent.ForkJoinPool.common.parallelism=0. Single-threaded performance is slightly better than double that of the Haskell solution.

Some of the optimizations include:

The bulk of the processing time is spent in Array.sort calls in normalizeInPlace. Finding a way to compare polyomino transformations without sorting could easily result in a further 4x speedup. The forking is also not done very intelligently which leads to unbalanced tasks and unused cores at higher levels of parallelism.

import java.util.Arrays;
import java.util.concurrent.RecursiveTask;
import java.util.function.IntPredicate;
import java.util.function.IntUnaryOperator;
import java.util.function.LongSupplier;
import java.util.function.ToLongFunction;

/**
 * Utility methods for working with an int that represents a pair of short values.
 */
class Point {
    static final int start = p(0, 0);
    static final int[] neighbors = new int[] {-0x10000, -0x1, 0x1, 0x10000};

    static int x(int p) {
        return (p >> 16) - 0x4000;
    }

    static int y(int p) {
        return (short)(p) - 0x4000;
    }

    static int p(int x, int y) {
        return ((x + 0x4000) << 16) | (y + 0x4000);
    }

    static int rot(int p) {
        return p(-y(p), x(p));
    }

    static int mirror(int p) {
        return p(-x(p), y(p));
    }
}

/**
 * Minimal primitive array-based collections.
 */
class IntArrays {
    /** Concatenates the end of the first array with the beginning of the second. */
    static int[] arrayConcat(int[] a, int aOffset, int[] b, int bLen) {
        int aLength = a.length - aOffset;
        int[] result = new int[aLength + bLen];
        System.arraycopy(a, aOffset, result, 0, aLength);
        System.arraycopy(b, 0, result, aLength, bLen);
        return result;
    }

    /** Adds a new value to a sorted set, returning the new result */
    static int[] setAdd(int[] set, int val) {
        int[] dst = new int[set.length + 1];
        int i = 0;
        for (; i < set.length && set[i] < val; i++) {
            dst[i] = set[i];
        }
        dst[i] = val;
        for (; i < set.length; i++) {
            dst[i + 1] = set[i];
        }
        return dst;
    }

    private static final int[] primes = new int[] {
            5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61,
            67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131
    };

    /**
     * Allocate an array large enough to hold a fixed-capacity hash table
     * that can contain "seen" points for generating polyominos of size n.
     */
    static int[] makeHashTable(int n) {
        return new int[primes[-(Arrays.binarySearch(primes, n * 3) + 1)]];
    }

    /** Inserts a new value to a hash table, in-place */
    static void hashInsert(int[] table, int val) {
        int pos = (val * 137) % table.length, startPos = pos;
        if (table[pos] != 0) {
            while ((table[pos = (pos + 1) % table.length]) != 0) {
                if (pos == startPos) {
                    throw new AssertionError("table full");
                }
            }
        }
        table[pos] = val;
    }

    /** Checks whether a hash table contains the specified value */
    static boolean hashContains(int[] table, int val) {
        int pos = (val * 137) % table.length, startPos = pos;
        while (true) {
            int curr = table[pos];
            if (curr == val) return true;
            if (curr == 0) return false;
            pos = (pos + 1) % table.length;
            if (pos == startPos) {
                throw new AssertionError("table full");
            }
        }
    }
}

/**
 * Recursively generates int arrays representing collections of Points,
 * applying a function to each array to compute a long, and returns the sum
 * of all such values.
 */
class PolyominoVisitor extends RecursiveTask<Long> {
    PolyominoVisitor(ToLongFunction<? super int[]> func, int n) {
        this(func, n, 0, 1, new int[0], IntArrays.makeHashTable(n), new int[]{Point.start});
    }

    private PolyominoVisitor(ToLongFunction<? super int[]> action, int n,
                             int i, int limit, int[] used, int[] seen, int[] untried) {
        this.func = action;
        this.n = n;
        this.start = () -> visit(i, limit, used, seen, untried);
    }

    private final boolean visitSmaller = true;
    private final ToLongFunction<? super int[]> func;
    private final int n;
    private final LongSupplier start;

    @Override
    protected Long compute() {
        return start.getAsLong();
    }

    private long visit(int i, int limit, int[] used, int[] seen, int[] untried) {
        long val = 0;
        if (used.length + 1 == n) {
            // reached the second to last level, so we can apply the function
            // directly to our children
            for (; i < limit; i++) {
                val += func.applyAsLong(IntArrays.setAdd(used, untried[i]));
            }
        } else if (used.length + 6 < n && limit - i >= 2) {
            // eligible to split
            PolyominoVisitor[] tasks = new PolyominoVisitor[limit - i];
            for (int j = 0; j < tasks.length; j++) {
                tasks[j] = new PolyominoVisitor(func, n,
                        i + j, i + j + 1, used, seen, untried);

            }
            invokeAll(tasks);
            for (PolyominoVisitor task : tasks) val += task.getRawResult();
            return val;
        } else {
            // recursively visit children
            int[] newReachable = new int[4];
            IntPredicate inSeen = p -> !IntArrays.hashContains(seen, p);
            for (; i < limit; i++) {
                int candidate = untried[i];
                int[] child = IntArrays.setAdd(used, candidate);
                int reachableCount = neighbors(candidate, inSeen, newReachable);
                int[] newSeen = seen.clone();
                for (int j = 0; j < reachableCount; j++) IntArrays.hashInsert(newSeen, newReachable[j]);
                int[] newUntried = IntArrays.arrayConcat(untried, i + 1, newReachable, reachableCount);
                val += visit(0, newUntried.length, child, newSeen, newUntried);
            }
        }
        if (visitSmaller && used.length > 0 && limit == untried.length) {
            val += func.applyAsLong(used);
        }
        return val;
    }

    /**
     * Write the greater-than-origin neighbors of the given point
     * that pass the provided predicate into the provided array,
     * returning the count written.
     */
    private static int neighbors(int p, IntPredicate pred, int[] dst) {
        int count = 0;
        for (int offset : Point.neighbors) {
            int n = p + offset;
            if (n > Point.start && pred.test(n)) {
                dst[count++] = n;
            }
        }
        return count;
    }
}

/**
 * Function that computes how many buildings are constructable on a given
 * polyomino base. Considers symmetry, returning 0 if the figure is not the
 * canonical version (i.e. has a smaller transformation).
 *
 * Adapted largely unchanged from Christian Sievers
 * https://codegolf.stackexchange.com/a/199919
 */
class BuildingCounter implements ToLongFunction<int[]> {
    private final int n;

    public BuildingCounter(int n) {
        this.n = n;
    }

    @Override
    public long applyAsLong(int[] fig) {
        return combinations(n - fig.length, fig);
    }

    private static int[] map(int[] fig, IntUnaryOperator func) {
        int[] result = new int[fig.length];
        for (int i = 0; i < fig.length; i++) {
            result[i] = func.applyAsInt(fig[i]);
        }
        return result;
    }

    private static int[] normalizeInPlace(int[] fig) {
        Arrays.sort(fig);
        int d = fig[0] - Point.start;
        for (int i = 0; i < fig.length; i++) {
            fig[i] -= d;
        }
        return fig;
    }

    private static int[] rot(int[] ps) {
        return normalizeInPlace(map(ps, Point::rot));
    }

    private static int[] mirror(int[] ps) {
        return normalizeInPlace(map(ps, Point::mirror));
    }

    private static int myf(int r, int sz, int[] fig) {
        int max = Integer.MIN_VALUE;
        for (int p : fig) {
            if (p > max) max = p;
        }
        int w = Point.x(max);
        if (w % 2 == 0) {
            int wh = w / 2;
            int myb = 0;
            for (int p : fig) {
                if (Point.x(p) == wh) myb++;
            }
            return c12(myb, (sz - myb)/2, r);
        } else {
            return c1h(sz, r);
        }
    }

    private static int mdf(int r, int sz, int[] fig) {
        int lo = Integer.MAX_VALUE;
        for (int p : fig) {
            int tmp = Point.y(p);
            if (tmp < lo) lo = tmp;
        }
        int mdb = 0;
        for (int p : fig) {
            if (Point.x(p) == Point.y(p) - lo) mdb++;
        }
        return c12(mdb, (sz-mdb)/2, r);
    }

    private static long combinations(int r, int[] fig) {
        int[][] alts = new int[7][];
        alts[0] = rot(fig);
        alts[1] = rot(alts[0]);
        alts[2] = rot(alts[1]);
        alts[3] = mirror(fig);
        alts[4] = mirror(alts[0]);
        alts[5] = mirror(alts[1]);
        alts[6] = mirror(alts[2]);
        int[] rfig = alts[0];
        int[] cmps = new int[7];
        for (int i = 0; i < 7; i++) {
            if ((cmps[i] = Arrays.compare(fig, alts[i])) > 0) {
                return 0;
            }
        }
        if (r == 0) {
            return 1;
        }
        int sz = fig.length;
        int qtfc = (sz % 2 == 0) ? c1q(sz, r) : sc1x(4, sz, r);
        int htfc = (sz % 2 == 0) ? c1h(sz, r) : sc1x(2, sz, r);
        int idfc = c1(sz, r);
        int[] fsc = new int[] {qtfc, htfc, qtfc,
                myf(r, sz, fig), mdf(r, sz, fig),
                myf(r, sz, rfig), mdf(r, sz, rfig)};
        int gs = 1;
        int allfc = idfc;
        for (int i = 0; i < fsc.length; i++) {
            if (cmps[i] == 0) {
                allfc += fsc[i];
                gs++;
            }
        }
        return allfc / gs;
    }

    private static int c1(int n, int t) {
        int v = 1;
        for (int x = 1; x <= t; x++) {
            v = v * (n+x-1) / x;
        }
        return v;
    }

    private static int c1h(int n, int t) {
        return c1d(n, t, 2);
    }

    private static int c1q(int n, int t) {
        return c1d(n, t, 4);
    }

    private static int c1d(int n, int t, int q) {
        if (t % q == 0) {
            return c1(n / q, t / q);
        } else {
            return 0;
        }
    }

    private static int sc1x(int m, int n, int t) {
        return c1(1 + n / m, t / m);
    }

    private static int c12(int s, int d, int t) {
        int sum = 0;
        for (int i = t/2; i >= 0; i--) {
            sum += c1(s, t-2*i) * c1(d, i);
        }
        return sum;
    }
}

public class Main {
    public static long count(int n) {
        return new PolyominoVisitor(new BuildingCounter(n), n).compute();
    }

    public static void main(String[] args) {
        if (args.length > 0) {
            System.out.println(args[0] + ": " + count(Integer.parseInt(args[0])));
        } else {
            for (int i = 1; i <= 99; i++) {
                System.out.println(i + ": " + count(i));
            }
        }
    }
}

Invocation

javac Main.java
java Main 17

Try it online!

Results

(when run without an argument)

...
16: 438030079
17: 2092403558
18: 10027947217
19: 48198234188
20: 232261124908
21: 1121853426115

Haskell, \$n=16\$ in about 9 minutes

As observed by @xnor in the comments, we can break the problem down into two parts: generate polyominoes (where I reused a lot from here, then count the ways to distribute the remaining cubes.

The symmetries are accounted for by using Burnside's lemma. So we need to know how many buildings of a given symmetric shape are fixed by a symmetry. Consider for example a shape has one mirror symmetry where the axis of reflection goes through \$s\$ squares of the shape and the reflection identifies \$d\$ pairs of further squares of the shape (so its size is \$s+2d\$). Then the buildings of this shape with \$r\$ additional cubes that have this symmetry correspond to the solutions of \$x_1+\dots+x_s+2y_1+\dots+2y_d=r\$ with nonnegative integers. The number of solutions is added to the total number of possibly equivalent buildings, and the sum divided by two. Note that a rotational symmetry always fixes zero or one square of a shape.

Compile the code with something like ghc -O2 -o buildings buildings.hs. The executable takes one optional parameter. If it is given, it will compute the number of buildings with that many cubes. Otherwise, it will compute all values.

{-# LANGUAGE BangPatterns #-}

import Data.List (sort)
import qualified Data.Set as S
import System.Environment (getArgs)

data P = P !Int !Int deriving (Eq, Ord)

start :: P
start = P 0 0

neighs :: P -> [P]
neighs (P x y) = [ p | p <- [P (x+1) y, P (x-1) y, P x (y+1), P x (y-1)],
                       p > start ]

count :: Int -> Int -> S.Set P -> S.Set P -> [P] -> Int
count 0 c _ _ _ = c
count _ c _ _ [] = c 
count n c used seen (p:possible) =
  let !c' = count n c used seen possible
      !n' = n-1
      next = S.insert p used
      !sz = S.size next
      !c'' = c' + combinations n' sz (S.toAscList next)
      new = [ n | n <- neighs p, n `S.notMember` seen ]
  in count n' c'' next (foldr S.insert seen new) (new++possible)

class Geom g where
  translate :: Int -> Int -> g -> g
  rot :: g -> g
  mirror :: g -> g

instance Geom P where
  translate !dx !dy (P x y) = P (x+dx) (y+dy)
  rot (P x y) = P (-y) x
  mirror (P x y) = P (-x) y

instance (Geom g, Ord g) => Geom [g] where
  translate !dx !dy = map $ translate dx dy
  rot = sort . map rot
  mirror = sort . map mirror

normalize :: [P] -> [P]
normalize fig = let (P x y) = head fig
                in translate (-x) (-y) fig

-- fixed points of horizontal mirror symmetry
myf :: Int -> Int -> [P] -> Int
myf r sz fig =
  let w = (maximum [ x | P x _ <- fig ])
      wh = w `div` 2
      myb = sum [ 1 | P x _ <- fig, x == wh ]
  in if even w -- odd width! 
     then c12 myb ((sz-myb) `div` 2) r
     else c1h sz r

-- fixed points of diagonal mirror symmetry
mdf :: Int -> Int -> [P] -> Int
mdf r sz fig =
  let lo = minimum [ y | P _ y <- fig ]
      mdb = sum [ 1 | P x y <- fig, x == y-lo ]
  in c12 mdb ((sz-mdb) `div` 2) r

combinations :: Int -> Int -> [P] -> Int
combinations r sz fig = 
  let rotated = take 4 $ iterate (normalize . rot) fig
      rfig = rotated !! 1
      mirrored = map (normalize . mirror) rotated
      alts = tail rotated ++ mirrored
      cmps = map (compare fig) alts
      -- All fixed points computations assume that the symmetry exists.
      -- fixed points of quarter turn:
      qtfc = if even sz then c1q sz r else sc1x 4 sz r 
      -- fixed points of half turn:
      htfc = if even sz then c1h sz r else sc1x 2 sz r
      -- fixed points of reflections:
      mirror_fc = [ fun r sz f |
                    f   <- [ fig, rfig ],
                    fun <- [ myf, mdf ]    ]
      -- all possibilities, i.e. fixed points of identity:
      idfc = c1 sz r
      fsc = [ qtfc, htfc, qtfc] ++ mirror_fc
      -- fixed points of symmetries that really exist:
      allfc = idfc : [ fc | (fc,EQ) <- zip fsc cmps ]
      -- group size of symmetry group:
      gs = length allfc
      res = if r==0 then 1 else sum allfc `div` gs
  in if any (GT ==) cmps
      -- only count if we have the smallest representative
      then 0 else res 

-- Number of ways to express t as sum of n nonnegative integers.
-- binomial(n+t-1, n-1)
c1 n t = foldl (\v x -> v * (n+x-1) `div` x) 1 [1..t]

-- Number of ways to express t as twice the sum of n/2 nn-integers
c1h n t | even t    = c1 (n `div` 2) (t `div` 2)
        | otherwise = 0

-- Number of ways to express t as four times the sum of n/4 nn-integers.
c1q n t | t `mod` 4 == 0 = c1 (n `div` 4) (t `div` 4)
        | otherwise      = 0

-- Number of ways to express t as an nn-integer plus m times the sum
-- of n/m nn-integers
sc1x m n t = c1 (1 + n `div` m) (t `div` m)

-- Number of ways to express t as the sum of s nn-integers
-- plus twice the sum of d nn-integers
c12 s d t = sum [ c1 s (t-2*t2) * c1 d t2 | t2 <- [ 0 .. t `div` 2 ] ]

count_buildings :: Int -> Int
count_buildings n = count n 0 S.empty S.empty [start]

output :: Int -> IO ()
output n = putStrLn $ show n ++ ": " ++ show (count_buildings n)

main = do args <- getArgs
          case args of
            []  -> mapM_ output [1..]
            [n] -> output (read n)

Try it online!

Results

15: 92038062
16: 438030079
17: 2092403558
18: 10027947217   (2 1/2 h)
19: 48198234188   (10 h)
20: 232261124908  (40 h)

Java 8, \$n=14\$ in 2m31s1

1. Using the AdoptOpenJDK8 distribution, on a 2-core Amber Lake Intel Core i5-based Mac. On an Amazon EC2 m5.xlarge, takes 1m16s.

This takes an inductive approach where, for each rank, it builds off all the buildings of the previous rank by placing cubes in all legal positions (on top of and next to existing cubes), and then removing buildings that are (possibly transformed) duplicates of other buildings. This means that to enumerate the buildings in a rank, all previous ranks must be also be computed. Both compute time and memory are constrained resources — this algorithm relies on keeping millions of Building objects in memory — so I've had a hard time computing beyond \$n=14\$ on my machine even without the 10 minute time limit.

This solution includes a parallel-Stream-based approach (which can be enabled with the parallel JVM system property) which is faster on a multi-core system but also more memory-hungry. This approach was used for the timing results above. The non-parallel approach takes almost twice as long to count \$n=14\$ but is able to do so using only a third of the memory.

Garbage collector settings and tuning can have a significant impact on the runtime and ability of the process to complete. I've also tested with OpenJDK 13, but for whatever reason have seen the best results on 8 so far.

import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

public final class Building {
    /**
     * A flattened two-dimensional matrix of heights (see toIndex).
     * Buildings are always assumed to be "aligned", such that they exactly
     * fit within their (width, height) bounding box.
     */
    private final byte[] stacks;
    private final int hashCode;
    private final byte width;
    private final byte height;

    public Building() {
        this(new byte[]{1}, 1);
    }

    private Building(byte[] stacks, int width) {
        assert stacks.length % width == 0;
        this.stacks = stacks;
        this.width = (byte) width;
        this.height = (byte) (stacks.length / width);
        this.hashCode = 31 * width + Arrays.hashCode(stacks);
    }

    /**
     * Return the building created by adding a cube at the specified coordinates.
     * The coordinates must be within the current building bounds or else
     * directly adjacent to one of the sides, but this is not validated.
     */
    Building add(int x, int y) {
        if (x < 0) {
            byte[] newStacks = widen(true);
            newStacks[y * (width + 1)]++;
            return new Building(newStacks, width + 1);
        } else if (x < width) {
            byte[] newStacks;
            if (y < 0) {
                newStacks = new byte[stacks.length + width];
                System.arraycopy(stacks, 0, newStacks, width, stacks.length);
                y = 0;
            } else if (y * width < stacks.length) {
                newStacks = Arrays.copyOf(stacks, stacks.length);
            } else {
                newStacks = Arrays.copyOf(stacks, stacks.length + width);
            }
            newStacks[toIndex(x, y)]++;
            return new Building(newStacks, width);
        } else {
            byte[] newStacks = widen(false);
            newStacks[x + y * (width + 1)]++;
            return new Building(newStacks, width + 1);
        }
    }

    byte[] widen(boolean shift) {
        byte[] newStacks = new byte[stacks.length + height];
        int writeIndex = shift ? 1 : 0;
        for (int i = 0; i < stacks.length; i++) {
            newStacks[writeIndex++] = stacks[i];
            if (i % width == width - 1) {
                writeIndex++;
            }
        }
        return newStacks;
    }
    int toIndex(int x, int y) {
        return x + y * width;
    }

    boolean inBounds(int x, int y) {
        return x >= 0 && y >= 0 && x < width && y < height;
    }

    /**
     * Return a stream of all legal buildings that can be created by adding a
     * cube to this building.
     */
    Stream<Building> grow() {
        int wider = width + 2;
        int max = (height + 2) * wider;

        return StreamSupport.stream(new Spliterators.AbstractSpliterator<Building>(max, 0) {
            int i = -1;

            @Override
            public boolean tryAdvance(Consumer<? super Building> action) {
                while ((++i) < max) {
                    // Try adding a cube to every position on the grid,
                    // as well as adjacent to it
                    int x = i % wider - 1;
                    int y = i / wider - 1;
                    int index = toIndex(x, y);
                    if (x < 0) {
                        if (y >= 0 && y < height) {
                            if (stacks[index + 1] > 0) {
                                action.accept(add(x, y));
                                return true;
                            }
                        }
                    } else if (x < width) {
                        if (y < 0) {
                            if (stacks[index + width] > 0) {
                                action.accept(add(x, y));
                                return true;
                            }
                        } else if (y < height) {
                            // it is on the existing grid
                            if (stacks[index] > 0) {
                                action.accept(add(x, y));
                                return true;
                            } else {
                                // is it adjacent to a stack?
                                for (Direction d : Direction.values()) {
                                    int x2 = x + d.x, y2 = y + d.y;
                                    if (inBounds(x2, y2) && stacks[toIndex(x2, y2)] > 0) {
                                        action.accept(add(x, y));
                                        return true;
                                    }
                                }
                            }
                        } else if (stacks[index - width] > 0) {
                            action.accept(add(x, y));
                            return true;
                        }
                    } else if (y >= 0 && y < height) {
                        if (stacks[index - 1] > 0) {
                            action.accept(add(x, y));
                            return true;
                        }
                    }
                }
                return false;
            }
        }, false);
    }

    Building reflect() {
        byte[] newStacks = new byte[stacks.length];
        for (int x = 0; x < width; x++) {
            for (int y = 0; y < height; y++) {
                newStacks[y + x * height] = stacks[toIndex(x, y)];
            }
        }
        return new Building(newStacks, height);
    }

    Building rotate() {
        byte[] newStacks = new byte[stacks.length];
        for (int x = 0; x < width; x++) {
            for (int y = 0, x2 = height - 1; y < height; y++, x2--) {
                newStacks[x2 + x * height] = stacks[toIndex(x, y)];
            }
        }
        return new Building(newStacks, height);
    }

    Collection<Building> transformations() {
        List<Building> bs = new ArrayList<>(7);
        Building b1 = this, b2 = this.reflect();
        bs.add(b2);
        for (int i = 0; i < 3; i++) {
            bs.add((b1 = b1.rotate()));
            bs.add((b2 = b2.rotate()));
        }
        return bs;
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        Building building = (Building) o;
        return width == building.width &&
                Arrays.equals(stacks, building.stacks);
    }

    @Override
    public int hashCode() {
        return hashCode;
    }

    private enum Direction {
        N(0, 1), E(1, 0), S(0, -1), W(-1, 0);

        final int x;
        final int y;
        Direction(int x, int y) {
            this.x = x;
            this.y = y;
        }
    }

    public static int count(int rank) {
        long start = System.nanoTime();
        Collection<Building> buildings = new HashSet<>();

        for (int i = 1; i <= rank; i++) {
            if (i == 1) {
                buildings = Arrays.asList(new Building());
            } else if (Boolean.getBoolean("parallel")) {
                // Using parallel streams is generally faster, but requires
                // more memory since more Buildings are retained before being
                // discarded
                ConcurrentMap<Building, Integer> map =
                        new ConcurrentHashMap<>(buildings.size() * 4);
                AtomicInteger atomicInt = new AtomicInteger();
                buildings.parallelStream()
                        .flatMap(Building::grow)
                        .forEach(b -> {
                            map.putIfAbsent(b, atomicInt.incrementAndGet());
                        });
                // Keep only the buildings that do not have a transformation
                // with a lower index
                buildings = map.entrySet().parallelStream()
                        .filter(entry -> {
                            int index = entry.getValue();
                            for (Building b2 : entry.getKey().transformations()) {
                                Integer index2 = map.get(b2);
                                if (index2 != null && index2 < index) {
                                    return false;
                                }
                            }
                            return true;
                        })
                        .map(Map.Entry::getKey)
                        .collect(Collectors.toList());
            } else {
                Set<Building> set = new HashSet<>(buildings.size() * 4);
                // Only add a building to the set if it doesn't already have a
                // transformation in it.
                buildings.stream()
                        .flatMap(Building::grow)
                        .forEach(b -> {
                            if (!set.contains(b)) {
                                for (Building t : b.transformations()) {
                                    if (set.contains(t)) return;
                                }
                                set.add(b);
                            }
                        });
                buildings = set;
            }
            System.err.println(i + " --> " + buildings.size());
            long now = System.nanoTime();
            double ms = ((double) now - start) / 1000000;
            System.err.println("time: " + (ms < 1000 ? ms + " ms" : ms / 1000 + " s"));
        }
        return buildings.size();
    }

    public static void main(String[] args) {
        System.out.println(Building.count(Integer.parseInt(args[0])));
    }
}

Try it online! (non-parallel, up to n=12)

Invocation

javac Building.java
java -XX:+UseParallelGC -Xms14g -Xmx14g -Dparallel=true Building 14

ParallelGC is the default on Java 8, but is included in case you are using a later version JDK where G1GC is the default.

Output

1 --> 1
time: 0.410181 ms
2 --> 2
time: 97.807367 ms
3 --> 4
time: 99.648279 ms
4 --> 12
time: 101.00362 ms
5 --> 35
time: 102.4856 ms
6 --> 129
time: 105.723149 ms
7 --> 495
time: 113.747058 ms
8 --> 2101
time: 130.012756 ms
9 --> 9154
time: 193.924776 ms
10 --> 41356
time: 436.551396 ms
11 --> 189466
time: 991.984875 ms
12 --> 880156
time: 3.899371721 s
13 --> 4120515
time: 18.794214388 s
14 --> 19425037
time: 151.782854829 s
19425037

(For reference, \$15 \rightarrow 92{,}038{,}062\$)

JavaScript (Node.js), \$N=10\$ in 4m02s1

1: on an Intel Code i7, 7th Gen

This only includes some trivial optimizations and is therefore quite inefficient. It does at least confirm the results listed in the challenge.

function build(n) {
  let layer = [],
      cube = new Set,
      count = 0,
      x, y;

  for(y = 0; y < n; y++) {
    for(layer[y] = [], x = 0; x < n; x++) {
      layer[y][x] = 0;
    }
  }

  function fill(k, alignTop) {
    let x, y;

    if(k == 0) {
      if(!cube.has(layer + '')) {
        let transf;

        count++;

        cube.add(layer + '');
        cube.add((transf = rotate(n, layer)) + '');
        cube.add((transf = rotate(n, transf)) + '');
        cube.add((transf = rotate(n, transf)) + '');

        cube.add((transf = mirror(layer)) + '');
        cube.add((transf = rotate(n, transf)) + '');
        cube.add((transf = rotate(n, transf)) + '');
        cube.add((transf = rotate(n, transf)) + '');
      }
      return;
    }

    let y0;

    for(y0 = 0; !layer[y0].some(v => v); y0++) {}

    for(y = Math.max(0, y0 - 1); y < n; y++) {
      for(x = 0; x < n; x++) {
        if(
          !layer[y][x] && (
            (y && layer[y - 1][x]) ||
            (y < n - 1 && layer[y + 1][x]) ||
            (x && layer[y][x - 1]) ||
            (x < n - 1 && layer[y][x + 1])
          )
        ) {
          for(let i = 1; i <= (alignTop ? k : k - y0 - 1); i++) {
            layer[y][x] = i;
            fill(k - i, alignTop || !y);
            layer[y][x] = 0;
          }
        }
      }
    }
  }

  for(y = 0; y < n; y++) {
    for(let i = 1; i <= n - y; i++) {
      layer[y][0] = i;
      fill(n - i, !y);
      layer[y][0] = 0;
    }
  }

  return count;
}

function rotate(n, layer) {
  let rot = [],
      x, y;

  for(y = 0; y < n; y++) {
    for(rot[y] = [], x = 0; x < n; x++) {
      rot[y][x] = layer[n - x - 1][y];
    }
  }
  return align(rot);
}

function mirror(layer) {
  return align([...layer].reverse());
}

function align(layer) {
  while(!layer[0].some(v => v)) {
    let s = layer.shift();
    layer = [...layer, s];
  }
  while(!layer[0].some((_, y) => layer[y][0])) {
    layer = layer.map(r => {
      return [...r.slice(1), 0];
    });
  }
  return layer;
}

Try it online! (up to \$N=8\$)

Output

N = 1 --> 1
time: 10.352ms
N = 2 --> 2
time: 0.935ms
N = 3 --> 4
time: 0.877ms
N = 4 --> 12
time: 2.530ms
N = 5 --> 35
time: 9.060ms
N = 6 --> 129
time: 33.333ms
N = 7 --> 495
time: 157.160ms
N = 8 --> 2101
time: 1559.707ms
N = 9 --> 9154
time: 18555.900ms
N = 10 --> 41356
time: 242855.989ms