g | x | w | all

Bytes	Lang	Time	Link
nan	APLNARS	250726T101951Z	Rosario
nan	Rust	241130T135626Z	138 Aspe
000	C 30	240731T044900Z	qwr
nan		160402T002944Z	will.fis
nan		160611T135516Z	P_P
nan		150116T133549Z	AlexPnt
000	Nimrod ~38	140507T052351Z	Reimer B
000	Nimrod ~2	140509T165503Z	Reimer B
nan		140507T105421Z	Anna Jok
000	Java	140508T094941Z	Howard
nan	Matlab 1464 = 26355867/	140507T125113Z	Dennis J
001	φ2n = 2n −	140507T150121Z	Ry-

APL(NARS), score 3.8 (43000/11100)

phi←{×/(v-1)×(v←↑¨k)*¯1+≢¨k←⊂⍨π⍵}

r←f w;i
r←0⋄i←1
→0×⍳i≥w⋄r+←phi i⋄i+←1⋄→2

phi would be the totient function, f would be the function that calculate the value sum of totient values in the range pass in input...

10 second are 1..43000 consecutive numbers to calculate totient function.

The division range numbers in Apl for 10 seconds / range numbers the C code of the question in 10 seconds, here in this little pc is 43000/11100=3.8738

Rust, ~270(5400000/20000)

Port of @will.fiset's Python 2 code in Rust.

Run it on Rust Playground!

// cargo build --release && time target/release/my_totient


use std::collections::{VecDeque, HashSet};
use rand::Rng;

fn gcd(mut a: u64, mut b: u64) -> u64 {
    while b != 0 {
        let temp = a % b;
        a = b;
        b = temp;
    }
    a
}

fn mod_pow(mut base: u64, mut exponent: u64, modulus: u64) -> u64 {
    if modulus == 1 {
        return 0;
    }
    let mut result = 1u64;
    base %= modulus;
    while exponent > 0 {
        if exponent % 2 == 1 {
            result = result.wrapping_mul(base) % modulus;
        }
        exponent >>= 1;
        base = base.wrapping_mul(base) % modulus;
    }
    result
}

fn rabin_miller(p: u64) -> bool {
    if p < 2 {
        return false;
    }
    if p != 2 && p % 2 == 0 {
        return false;
    }
    let mut s = p - 1;
    while s % 2 == 0 {
        s >>= 1;
    }
    let mut rng = rand::thread_rng();
    for _ in 0..10 {
        let a = rng.gen_range(1..p);
        let mut temp = s;
        let mut modp = mod_pow(a, temp, p);
        while temp != p - 1 && modp != 1 && modp != p - 1 {
            modp = modp.wrapping_mul(modp) % p;
            temp <<= 1;
        }
        if modp != p - 1 && temp % 2 == 0 {
            return false;
        }
    }
    true
}

fn pollard_rho(n: u64) -> u64 {
    if n % 2 == 0 {
        return 2;
    }
    let mut rng = rand::thread_rng();
    let mut x: u64 = rng.gen_range(2..1000000);
    let c: u64 = rng.gen_range(1..1000000);
    let mut y = x;
    let mut d = 1u64;
    let f = |x: u64| -> u64 { (x.wrapping_mul(x).wrapping_add(c)) % n };
    while d == 1 {
        x = f(x);
        y = f(f(y));
        d = gcd((x as i64 - y as i64).abs() as u64, n);
        if d == n {
            break;
        }
    }
    d
}

fn prime_factorization(n: u64) -> Vec<u64> {
    if n <= 0 {
        panic!("Invalid input, n <= 0");
    } else if n == 1 {
        return vec![];
    }

    let mut queue = VecDeque::new();
    let mut factors = Vec::new();

    queue.push_back(n);
    while let Some(l) = queue.pop_front() {
        if rabin_miller(l) {
            factors.push(l);
            continue;
        }
        let d = pollard_rho(l);
        if d == l {
            queue.push_back(l);
        } else {
            queue.push_back(d);
            queue.push_back(l / d);
        }
    }
    factors
}

fn phi(n: u64) -> u64 {
    if rabin_miller(n) {
        return n - 1;
    }
    let mut phi = n;
    let factors = prime_factorization(n);
    let unique_factors: HashSet<u64> = factors.into_iter().collect();
    for p in unique_factors {
        phi -= phi / p;
    }
    phi
}

fn main() {
    let mut n = 1u64;
    let mut s = 0u64;

    while n < 5400000{
        n += 1;
        s += phi(n);
    }

    println!("{}", s);
}

C: 30,000 = 600,000,000 / 20,000 (basic sieve)

This is a decade-old question of mine I forgot I wrote until I found it on google again. Nevertheless, I wanted to post a simple solution that is reasonably competitive and about the same length as my naive code. This is a small modification of the Sieve of Eratosthenes to use Euler's product formula that also runs in \$O(n \log \log n)\$ time. The only other easy optimization I did was only sieve and store the odd index values, since it's easy to get the even values.

For reference, this is the original python/pseudocode algorithm in 5 lines of computation:

def totient_range(n):
    tots = list(range(n+1))
    for p in range(2, n+1):
        if p == tots[p]:
            for k in range(p, n+1, p):
                tots[k] -= tots[k] // p

    return tots

#include "stdio.h"

#define N 600000000       

unsigned t[(N+1)/2];    

unsigned tots(unsigned i)         
{
    unsigned ih = i / 2;
    if (i % 2) 
        return t[ih];
    else
        return (ih % 2) ? t[ih/2] : 2 * tots(ih);
}

int main(void) 
{
    for (unsigned i = 1; i <= N; i += 2)     
        t[i/2] = i;

    for (unsigned p = 3; p <= N; p += 2) {    
        if (p == t[p/2]) {
            for (unsigned k = p; k <= N; k += 2*p) 
                t[k/2] -= t[k/2] / p;
        }
    }

    unsigned long long s = 0;
    for (unsigned i = 1; i <= N; ++i)    
        s += tots(i);

    printf("%lld\n", s);
}

Here is my Python implementation that seems to be able to crank out ~60000 numbers in 10seconds. I am factorizing numbers using the pollard rho algorithm and using the Rabin miller primality test.

from Queue import Queue
import random

def gcd ( a , b ):
    while b != 0: a, b = b, a % b
    return a

def rabin_miller(p):
    if(p<2): return False
    if(p!=2 and p%2==0): return False
    s=p-1
    while(s%2==0): s>>=1
    for _ in xrange(10):
        a=random.randrange(p-1)+1
        temp=s
        mod=pow(a,temp,p)
        while(temp!=p-1 and mod!=1 and mod!=p-1):
            mod=(mod*mod)%p
            temp=temp*2
        if(mod!=p-1 and temp%2==0): return False
    return True

def pollard_rho(n):
    if(n%2==0): return 2;
    x=random.randrange(2,1000000)
    c=random.randrange(2,1000000)
    y=x
    d=1
    while(d==1):
        x=(x*x+c)%n
        y=(y*y+c)%n
        y=(y*y+c)%n
        d=gcd(x-y,n)
        if(d==n): break;
    return d;

def primeFactorization(n):
    if n <= 0: raise ValueError("Fucked up input, n <= 0")
    elif n == 1: return []
    queue = Queue()
    factors=[]
    queue.put(n)
    while(not queue.empty()):
        l=queue.get()
        if(rabin_miller(l)):
            factors.append(l)
            continue
        d=pollard_rho(l)
        if(d==l):queue.put(l)
        else:
            queue.put(d)
            queue.put(l/d)
    return factors

def phi(n):

    if rabin_miller(n): return n-1
    phi = n
    for p in set(primeFactorization(n)):
        phi -= (phi/p)
    return phi

if __name__ == '__main__':
  
  n = 1
  s = 0

  while n < 60000:
    n += 1
    s += phi(n)
  print(s)

C#: 49,000 (980,000,000 / 20,000)

https://codegolf.stackexchange.com/a/26800 "Howard's code".
But modified, phi values are computed for odd integers.

using System;
using sw = System.Diagnostics.Stopwatch;
class Program
{
    static void Main()
    {
        sw sw = sw.StartNew();
        Console.Write(sumPhi(980000000) + " " + sw.Elapsed);
        sw.Stop(); Console.Read();
    }

    static long sumPhi(int n)  // sum phi[i] , 1 <= i <= n
    {
        long s = 0; int[] phi;
        if (n < 1) return 0; phi = buildPhi(n + 1);
        for (int i = 1; i <= n; i++) s += getPhi(i, phi);
        return s;
    }

    static int getPhi(int i, int[] phi)
    {
        if ((i & 1) > 0) return phi[i >> 1];
        if ((i & 3) > 0) return phi[i >> 2];
        int z = ntz(i); return phi[i >> z >> 1] << z - 1;
    }

    static int[] buildPhi(int n)  // phi[i >> 1] , i odd , i < n
    {
        int i, j, y, x, q, r, f; int[] phi;
        if (n < 2) return new int[] { 0 };
        phi = new int[n / 2]; phi[0] = 1;
        for (j = 2, i = 3; i < n; i *= 3, j *= 3) phi[i >> 1] = j;
        for (x = 4, i = 5; i <= n >> 1; i += x ^= 6)
        {
            if (phi[i >> 1] > 0) continue; phi[i >> 1] = i ^ 1;
            for (j = 3, y = 3 * i; y < n; y += i << 1, j += 2)
            {
                if (phi[j >> 1] == 0) continue; q = j; f = i ^ 1;
                while ((r = q) == i * (q /= i)) f *= i;
                phi[y >> 1] = f * phi[r >> 1];
            }
        }
        for (; i < n; i += x ^= 6)  // primes > n / 2 
            if (phi[i >> 1] == 0)
                phi[i >> 1] = i ^ 1;
        return phi;
    }

    static int ntz(int i)  // number of trailing zeros
    {
        int z = 1;
        if ((i & 0xffff) == 0) { z += 16; i >>= 16; }
        if ((i & 0x00ff) == 0) { z += 08; i >>= 08; }
        if ((i & 0x000f) == 0) { z += 04; i >>= 04; }
        if ((i & 0x0003) == 0) { z += 02; i >>= 02; }
        return z - (i & 1);
    }
}

New score: 61,000 (1,220,000,000 / 20,000)
In "App.config" I had to add "gcAllowVeryLargeObjects enabled=true".

    static long sumPhi(int n)
    {
        int i1, i2, i3, i4, z; long s1, s2, s3, s4; int[] phi;
        if (n < 1) return 0; phi = buildPhi(n + 1); n -= 4; z = 2;
        i1 = 1; i2 = 2; i3 = 3; i4 = 4; s1 = s2 = s3 = s4 = 0;
        if (n > 0)
            for (; ; )
            {
                s1 += phi[i1 >> 1];
                s2 += phi[i2 >> 2];
                s3 += phi[i3 >> 1];
                s4 += phi[i4 >> z >> 1] << z - 1;
                i1 += 4; i2 += 4; i3 += 4; i4 += 4;
                n -= 4; if (n < 0) break;
                if (z == 2)
                {
                    z = 3; i4 >>= 3;
                    while ((i4 & 3) == 0) { i4 >>= 2; z += 2; }
                    z += i4 & 1 ^ 1;
                    i4 = i3 + 1;
                }
                else z = 2;
            }
        if (n > -4) s1 += phi[i1 >> 1];
        if (n > -3) s2 += phi[i2 >> 2];
        if (n > -2) s3 += phi[i3 >> 1];
        if (n > -1) s4 += phi[i4 >> z >> 1] << z - 1;
        return s1 + s2 + s3 + s4;
    }

    static int[] buildPhi(int n)
    {
        int i, j, y, x, q0, q1, f; int[] phi;
        if (n < 2) return new int[] { 0 };
        phi = new int[n / 2]; phi[0] = 1;
        for (uint u = 2, v = 3; v < n; v *= 3, u *= 3) phi[v >> 1] = (int)u;
        for (x = 4, i = 5; i <= n >> 1; i += x ^= 6)
        {
            if (phi[i >> 1] > 0) continue; phi[i >> 1] = i ^ 1;
            for (j = 3, y = 3 * i; y < n; y += i << 1, j += 2)
            {
                if (phi[j >> 1] == 0) continue; q0 = j; f = i ^ 1;
                while ((q1 = q0) == i * (q0 /= i)) f *= i;
                phi[y >> 1] = f * phi[q1 >> 1];
            }
        }
        for (; i < n; i += x ^= 6)
            if (phi[i >> 1] == 0)
                phi[i >> 1] = i ^ 1;
        return phi;
    }

Python 2.7: 10.999 (165975/15090)

Pypy 2.3.1: 28.496 (430000/15090)

Some interesting methods I use:

Rabin-Miller Strong Pseudoprime Test - A primality test that provides an efficient probabilistic algorithm for determining if a given number is prime

Euler's product formula - The product is over the distinct prime numbers dividing n

$Euler's product formula$

Code:

import math
import random

#perform a Modular exponentiation
def modular_pow(base, exponent, modulus):
    result=1
    while exponent>0:
        if exponent%2==1:
           result=(result * base)%modulus
        exponent=exponent>>1
        base=(base * base)%modulus
    return result

#Miller-Rabin primality test
def checkMillerRabin(n,k):
    if n==2: return True
    if n==1 or n%2==0: return False

    #find s and d, with d odd
    s=0
    d=n-1
    while(d%2==0):
        d/=2
        s+=1
    assert (2**s*d==n-1)

    #witness loop
    composite=1
    for i in xrange(k):
        a=random.randint(2,n-1)
        x=modular_pow(a,d,n)
        if x==1 or x==n-1: continue
        for j in xrange(s-1):
            composite=1
            x=modular_pow(x,2,n)
            if x==1: return False #is composite
            if x==n-1: 
                composite=0
                break
        if composite==1:
            return False        #is composite
    return True                 #is probably prime

def findPrimes(n):              #generate a list of primes, using the sieve of eratosthenes

    primes=(n+2)*[True]

    for i in range(2,int(math.sqrt(n))+1):
        if primes[i]==True:
            for j in range(i**2,n+1,i):
                primes[j]=False

    primes=[i for i in range(2,len(primes)-1) if primes[i]==True]
    return primes

def primeFactorization(n,primes):   #find the factors of a number

    factors=[]

    i=0
    while(n!=1):
        if(n%primes[i]==0):
            factors.append(primes[i])
            n/=primes[i]
        else:
            i+=1

    return factors

def phi(n,primes):
    #some useful properties
    if (checkMillerRabin(n,10)==True):      #fast prime check
        return n-1

    factors=primeFactorization(n,primes)    #prime factors
    distinctive_prime_factors=set(factors)  

    totient=n
    for f in distinctive_prime_factors:     #phi = n * sum (1 - 1/p), p is a distinctive prime factor
        totient*=(1-1.0/f);

    return totient

if __name__ == '__main__':


    s=0
    N=165975
    # N=430000
    primes=findPrimes(N)    #upper bound for the number of primes
    for i in xrange(1,N):
        s+=phi(i,primes)

    print "Sum =",s

Nimrod: ~38,667 (580,000,000/15,000)

This answer uses a pretty simple approach. The code employs a simple prime number sieve that stores the prime of the smallest prime power in each slot for composite numbers (zero for primes), then uses dynamic programming to construct the totient function over the same range, then sums the results. The program spends virtually all its time constructing the sieve, then calculates the totient function in a fraction of the time. It looks like it comes down to constructing an efficient sieve (with the slight twist that one also has to be able to extract a prime factor for composite numbers from the result and has to keep memory usage at a reasonable level).

Update: Improved performance by reducing memory footprint and improving cache behavior. It's possible to squeeze out 5%-10% more performance, but the increase in code complexity is not worth it. Ultimately, this algorithm primarily exercises a CPU's von Neumann bottleneck, and there are very few algorithmic tweaks that can get around that.

Also updated the performance divisor since the C++ code wasn't meant to be compiled with all optimizations on and nobody else did it. :)

Update 2: Optimized sieve operation for improved memory access. Now handling small primes in bulk via memcpy() (~5% speedup) and skipping multiples of 2, 3, and 5 when sieving bigger primes (~10% speedup).

C++ code: 9.9 seconds (with g++ 4.9)

Nimrod code: 9.9 seconds (with -d:release, gcc 4.9 backend)

proc handleSmallPrimes(sieve: var openarray[int32], m: int) =
  # Small primes are handled as a special case through what is ideally
  # the system's highly optimized memcpy() routine.
  let k = 2*3*5*7*11*13*17
  var sp = newSeq[int32](k div 2)
  for i in [3,5,7,11,13,17]:
    for j in countup(i, k, 2*i):
      sp[j div 2] = int32(i)
  for i in countup(0, sieve.high, len(sp)):
    if i + len(sp) <= len(sieve):
      copyMem(addr(sieve[i]), addr(sp[0]), sizeof(int32)*len(sp))
    else:
      copyMem(addr(sieve[i]), addr(sp[0]), sizeof(int32)*(len(sieve)-i))
  # Fixing up the numbers for values that are actually prime.
  for i in [3,5,7,11,13,17]:
    sieve[i div 2] = 0

proc constructSieve(m: int): seq[int32] =
  result = newSeq[int32](m div 2 + 1)
  handleSmallPrimes(result, m)
  var i = 19
  # Having handled small primes, we only consider candidates for
  # composite numbers that are relatively prime with 31. This cuts
  # their number almost in half.
  let steps = [ 1, 7, 11, 13, 17, 19, 23, 29, 31 ]
  var isteps: array[8, int]
  while i * i <= m:
    if result[i div 2] == 0:
      for j in 0..7: isteps[j] = i*(steps[j+1]-steps[j])
      var k = 1 # second entry in "steps mod 30" list.
      var j = 7*i
      while j <= m:
        result[j div 2] = int32(i)
        j += isteps[k]
        k = (k + 1) and 7 # "mod 30" list has eight elements.
    i += 2

proc calculateAndSumTotients(sieve: var openarray[int32], n: int): int =
  result = 1
  for i in 2'i32..int32(n):
    var tot: int32
    if (i and 1) == 0:
      var m = i div 2
      var pp: int32 = 2
      while (m and 1) == 0:
        pp *= 2
        m = m div 2
      if m == 1:
        tot = pp div 2
      else:
        tot = (pp div 2) * sieve[m div 2]
    elif sieve[i div 2] == 0: # prime?
      tot = i - 1
      sieve[i div 2] = tot
    else:
      # find and extract the first prime power pp.
      # It's relatively prime with i/pp.
      var p = sieve[i div 2]
      var m = i div p
      var pp = p
      while m mod p == 0 and m != p:
        pp *= p
        m = m div p
      if m == p: # is i a prime power?
        tot = pp*(p-1)
      else:
        tot = sieve[pp div 2] * sieve[m div 2]
      sieve[i div 2] = tot
    result += tot

proc main(n: int) =
  var sieve = constructSieve(n)
  let totSum = calculateAndSumTotients(sieve, n)
  echo totSum

main(580_000_000)

Nimrod: ~2,333,333 (42,000,000,000/18,000)

This uses an entirely different approach from my previous answer. See comments for details. The longint module can be found here.

import longint

const max = 500_000_000

var ts_mem: array[1..max, int]

# ts(n, d) is defined as the number of pairs (a,b)
# such that 1 <= a <= b <= n and gcd(a,b) = d.
#
# The following equations hold:
#
# ts(n, d) = ts(n div d, 1)
# sum for i in 1..n of ts(n, i) = n*(n+1)/2
#
# This leads to the recurrence:
# ts(n, 1) = n*(n+1)/2 - sum for i in 2..n of ts(n, i)
#
# or, where ts(n) = ts(n, 1):
# ts(n) = n*(n+1)/2 - sum for i in 2..n of ts(n div i)
#
# Note that the large numbers that we deal with can
# overflow 64-bit integers.

proc ts(n, gcd: int): int =
  if n == 0:
    result = 0
  elif n == 1 and gcd == 1:
    result = 1
  elif gcd == 1:
    result = n*(n+1) div 2
    for i in 2..n:
      result -= ts(n, i)
  else:
    result = ts(n div gcd, 1)

# Below is the optimized version of the same algorithm.

proc ts(n: int): int =
  if n == 0:
    result = 0
  elif n == 1:
    result = 1
  else:
    if n <= max and ts_mem[n] > 0:
      return ts_mem[n]
    result = n*(n+1) div 2
    var p = n
    var k = 2
    while k < n div k:
      let pold = p
      p = n div k
      k += 1
      let t = ts(n div pold)
      result -= t * (pold-p)
    while p >= 2:
      result -= ts(n div p)
      p -= 1
    if n <= max:
      ts_mem[n] = result

proc ts(n: int128): int128 =
  if n <= 2_000_000_000:
    result = ts(n.toInt)
  else:
    result = n*(n+1) div 2
    var p = n
    var k = 2
    while k < n div k:
      let pold = p
      p = n div k
      k += 1
      let t = ts(n div pold)
      result = result - t * (pold-p)
    while p >= 2:
      result = result - ts(n div p)
      p = p - 1

echo ts(42_000_000_000.toInt128)

Python 3: ~24000 (335,000,000 / 14,000)

My version is a Python port of Howard's algorithm. My original function was a modification of an algorithm introduced in this blogpost.

I'm using Numpy and Numba modules to speed up the execution time. Note that normally you don't need to declare the types of the local variables (when using Numba), but in this case I wanted to squeeze the execution time as much as possible.

Edit: combined constructsieve and summarum functions into a single function.

C++: 9.99s (n = 14,000); Python 3: 9.94s (n = 335,000,000)

import numba as nb
import numpy as np
import time

n = 335000000

@nb.njit("i8(i4[:])", locals=dict(
    n=nb.int32, s=nb.int64, i=nb.int32,
    j=nb.int32, q=nb.int32, f=nb.int32))

def summarum(phi):
    s = 0

    phi[1] = 1

    i = 2
    while i < n:
        if phi[i] == 0:
            phi[i] = i - 1

            j = 2

            while j * i < n:
                if phi[j] != 0:
                    q = j
                    f = i - 1

                    while q % i == 0:
                        f *= i
                        q //= i

                    phi[i * j] = f * phi[q]
                j += 1
        s += phi[i]
        i += 1
    return s

if __name__ == "__main__":
    s1 = time.time()
    a = summarum(np.zeros(n, np.int32))
    s2 = time.time()

    print(a)
    print("{}s".format(s2 - s1))

Java, score ~24,000 (360,000,000 / 15,000)

The java code below does calculation of the totient function and the prime sieve together. Note that depending on your machine you have to increase the initial/maximum heap size (on my rather slow laptop I had to go up to -Xmx3g -Xms3g).

public class Totient {

    final static int size = 360000000;
    final static int[] phi = new int[size];

    public static void main(String[] args) {
        long time = System.currentTimeMillis();
        long sum = 0;

        phi[1] = 1;
        for (int i = 2; i < size; i++) {
            if (phi[i] == 0) {
                phi[i] = i - 1;
                for (int j = 2; i * j < size; j++) {
                    if (phi[j] == 0)
                        continue;

                    int q = j;
                    int f = i - 1;
                    while (q % i == 0) {
                        f *= i;
                        q /= i;
                    }
                    phi[i * j] = f * phi[q];
                }
            }
            sum += phi[i];
        }
        System.out.println(System.currentTimeMillis() - time);
        System.out.println(sum);
    }
}

Matlab: 1464 = 26355867/ 18000

I can't test your code so I divided by 18000 as it represents the fastest computer of those who tested. I came to the score using this property:

if p is prime, phi(p) = p - 1 (for p < 10^20)

I mostly like that it is a one liner:

sum(primes(500000000)-1)

φ(2ⁿ) = 2^{n − 1}
Σ φ(2ⁱ) = 2ⁱ − 1 for i from 1 to n

First, something to find times:

import os
from time import perf_counter

SEARCH_LOWER = -1
SEARCH_HIGHER = 1

def integer_binary_search(start, lower=None, upper=None, big_jump=1):
    if lower is not None and lower == upper:
        raise StopIteration # ?

    result = yield start

    if result == SEARCH_LOWER:
        if lower is None:
            yield from integer_binary_search(
                start=start - big_jump,
                lower=None,
                upper=start - 1,
                big_jump=big_jump * 2)
        else:
            yield from integer_binary_search(
                start=(lower + start) // 2,
                lower=lower,
                upper=start - 1)
    elif result == SEARCH_HIGHER:
        if upper is None:
            yield from integer_binary_search(
                start=start + big_jump,
                lower=start + 1,
                upper=None,
                big_jump=big_jump * 2)
        else:
            yield from integer_binary_search(
                start=(start + upper) // 2,
                lower=start + 1,
                upper=upper)
    else:
        raise ValueError('Expected SEARCH_LOWER or SEARCH_HIGHER.')

search = integer_binary_search(start=1000, lower=1, upper=None, big_jump=2500)
n = search.send(None)

while True:
    print('Trying with %d iterations.' % (n,))

    os.spawnlp(
        os.P_WAIT,
        'g++', 'g++', '-Wall', '-Wextra', '-pedantic', '-O0', '-o', 'reference',
        '-DITERATIONS=%d' % (n,),
        'reference.cpp')

    start = perf_counter()
    os.spawnl(os.P_WAIT, './reference', './reference')
    end = perf_counter()
    t = end - start

    if t >= 10.1:
        n = search.send(SEARCH_LOWER)
    elif t <= 9.9:
        n = search.send(SEARCH_HIGHER)
    else:
        print('%d iterations in %f seconds!' % (n, t))
        break

For the reference code, for me, that’s:

…
Trying with 14593 iterations.
64724364
14593 iterations in 9.987747 seconds!

Now, Haskell:

import System.Environment (getArgs)

phiSum :: Integer -> Integer
phiSum n = 2 ^ n - 1

main :: IO ()
main = getArgs >>= print . phiSum . (2^) . read . head

It makes something with 2525224 digits in 0.718 seconds. And now I’m just noticing @Howard’s comment.