g | x | w | all
Bytes Lang Time Link
197Python3250801T010932ZAjax1234
082Haskell190523T165825ZZgarb
146Perl 6190521T062836Zbb94
02005AB1E190520T165716ZGrimmy
193Perl 5180804T125954ZKjetil S
128JavaScript ES6180804T083349ZArnauld

Python3, 197 bytes

def f(s,m):
 q,r=[(m,0,[])],[]
 for m,M,c in q:
  if[]==m:r+=[M];continue
  u=m[0]
  q+=[(m[1:],M+(u not in c),C)for C in[c]+[c+[u]]*(len(c)<s and u not in c)+[[*{*c}-{u}]]*(u in c)]
 return min(r)

Try it online!

Python3, 356 bytes

Longer, but faster solution (on much larger test cases)

def f(s,m):
 d={}
 for i in m:d[V]={*d.get(V:=m.count(i),[]),i}
 t,F=[],1
 for j in[*{*d}][::-1]:t+=[*d[j]]*F;F=len(t)<s
 q,r=[(m,0,[])],[]
 for m,M,c in q:
  if r and M>=min(r):continue
  if[]==m:r+=[M];continue
  l,u=[c],m[0]
  if u in t:l+=[c+[u]]*(len(c)<s and u not in c)+[[*{*c}-{u}]]*(u in c)
  q+=[(m[1:],M+(u not in c),C)for C in l]
 return min(r)

Try it online!

Haskell, 82 bytes

f n|let(d:t)#c=1-sum[1|elem d c]+minimum[t#take n e|e<-scanr(:)(d:c)c];_#_=0=(#[])

Try it online!

Explanation

Works by brute force: all cache strategies are tried and the best result is returned.

f n            Define a function f on argument n (cache size) and a list (implicit).
 |let(d:t)#c=  Define binary helper function #.
               Arguments are list with head d (current data) and tail t (remaining data), and list c (cache).
 1-            It returns 1 minus
 sum[1|        1 if
 elem d c]+    d is in the cache, plus
 minimum[      minimum of
 t#            recursive calls to # with list t
 take n e|     and cache being the first n values of e, where
 e<-           e is drawn from
 scanr(:)  c]  the prefixes of c
 (d:c)         with d and c tacked to the end.
 ;_#_=0        If the first list is empty, return 0.
 =(#[])        f then calls # with the list argument and empty cache.

Perl 6, 146 bytes

->\a,\b {$_=set();$!=0;for b.kv ->\i,\v {$_{v}&&next;++$!;v∈b[i^..*]||next;$_∖=.keys.max({(grep $_,:k,b[i^..*])[0]//Inf})if $_>=a;$_∪=v};$!}

Try it online!

05AB1E, 20 bytes

vI∍Dyå≠i¼yª¹N.$sÃÙ]¾

Try it online!

Perl 5, 193 bytes

sub g{
  my($i,$m,$s,@a,%c)=(-1,0,@_);
  for(@a){
    $i++;
    next if $c{$_}++ || ++$m && keys%c <= $s;
    my($x,$d);
    for $k (sort keys %c){  #find which to delete, the one furtherst away
      my $n=0;
      ++$n && /^$k$/ && last for @a[$i+1..$#a];
      ($x,$d)=($n,$k) if $n>$x
    }
    delete $c{$d}
  }
  $m
}

Try it online!

print g(3,  5, 0, 1, 2, 0, 3, 1, 2, 5, 2),"\n";                     # 6
print g(2,  0, 1, 2, 0, 1, 0, 1),"\n";                              # 3
print g(3,  0, 1, 2, 1, 4, 3, 1, 0, 2, 3, 4, 5, 0, 2, 3, 4),"\n";   # 9

193 bytes without indentation, newlines, spaces, comments:

sub g{my($i,$m,$s,@a,%c)=(-1,0,@_);for(@a){$i++;next if$c{$_}++||++$m&&keys%c<=$s;my($x,$d);for$k(sort keys%c){my$n=0;++$n&&/^$k$/&&last for@a[$i+1..$#a];($x,$d)=($n,$k)if$n>$x}delete$c{$d}}$m}

JavaScript (ES6), 128 bytes

Takes input as (size)(list).

s=>a=>a.map((x,i)=>c.includes(x)?0:c[e++,[x,...c].map(m=(x,j)=>(k=[...a,x].indexOf(x,i+1))<m||(p=j,m=k)),i<s?i:p-1]=x,e=c=[])&&e

Try it online!

Commented

This is an implementation of Belady's algorithm.

s => a =>                      // s = cache size; a[] = token list
  a.map((x, i) =>              // for each token x at position i in a[]:
    c.includes(x) ?            //   if x is currently stored in the cache:
      0                        //     do nothing
    :                          //   else:
      c[                       //     update the cache:
        e++,                   //       increment the number of errors (cache misses)
        [x, ...c]              //       we want to find which value among x and all current
                               //       cache values will be needed for the longest time in
                               //       the future (or not needed anymore at all)
        .map(m =               //       initialize m to a non-numeric value
                 (x, j) =>     //       for each x at position j in this array:
          ( k = [...a, x]      //         k = position of x in the array made of all values
            .indexOf(x, i + 1) //         of a[] followed by x, starting at i + 1
          ) < m                //         if it's greater than or equal to m, or m is
          || (p = j, m = k)    //         still non-numeric: set p to j and m to k
        ),                     //       end of inner map()
        i < s ?                //       if i is less than the cache size:
          i                    //         just fill the cache by using the next cache slot
        :                      //       else:
          p - 1                //         use the slot that was found above
                               //         special case: if p = 0, x was the best candidate
                               //         and we're going to store it at c[-1], which is
                               //         simply ignored (it will not trigger c.includes(x))
      ] = x,                   //     store x at this position
      e = c = []               //     start with e = [] (coerced to 0) and c = []
  ) && e                       // end of outer map; return e