| Bytes | Lang | Time | Link |
|---|---|---|---|
| nan | APLNARS | 250726T101951Z | Rosario |
| nan | Rust | 241130T135626Z | 138 Aspe |
| 000 | C 30 | 240731T044900Z | qwr |
| nan | 160402T002944Z | will.fis | |
| nan | 160611T135516Z | P_P | |
| nan | 150116T133549Z | AlexPnt | |
| 000 | Nimrod ~38 | 140507T052351Z | Reimer B |
| 000 | Nimrod ~2 | 140509T165503Z | Reimer B |
| nan | 140507T105421Z | Anna Jok | |
| 000 | Java | 140508T094941Z | Howard |
| nan | Matlab 1464 = 26355867/ | 140507T125113Z | Dennis J |
| 001 | φ2n = 2n − | 140507T150121Z | Ry- |
APL(NARS), score 3.8 (43000/11100)
phi←{×/(v-1)×(v←↑¨k)*¯1+≢¨k←⊂⍨π⍵}
r←f w;i
r←0⋄i←1
→0×⍳i≥w⋄r+←phi i⋄i+←1⋄→2
phi would be the totient function, f would be the function that calculate the value sum of totient values in the range pass in input...
10 second are 1..43000 consecutive numbers to calculate totient function.
The division range numbers in Apl for 10 seconds / range numbers the C code of the question in 10 seconds, here in this little pc is 43000/11100=3.8738
Rust, ~270(5400000/20000)
Port of @will.fiset's Python 2 code in Rust.
Run it on Rust Playground!
// cargo build --release && time target/release/my_totient
use std::collections::{VecDeque, HashSet};
use rand::Rng;
fn gcd(mut a: u64, mut b: u64) -> u64 {
while b != 0 {
let temp = a % b;
a = b;
b = temp;
}
a
}
fn mod_pow(mut base: u64, mut exponent: u64, modulus: u64) -> u64 {
if modulus == 1 {
return 0;
}
let mut result = 1u64;
base %= modulus;
while exponent > 0 {
if exponent % 2 == 1 {
result = result.wrapping_mul(base) % modulus;
}
exponent >>= 1;
base = base.wrapping_mul(base) % modulus;
}
result
}
fn rabin_miller(p: u64) -> bool {
if p < 2 {
return false;
}
if p != 2 && p % 2 == 0 {
return false;
}
let mut s = p - 1;
while s % 2 == 0 {
s >>= 1;
}
let mut rng = rand::thread_rng();
for _ in 0..10 {
let a = rng.gen_range(1..p);
let mut temp = s;
let mut modp = mod_pow(a, temp, p);
while temp != p - 1 && modp != 1 && modp != p - 1 {
modp = modp.wrapping_mul(modp) % p;
temp <<= 1;
}
if modp != p - 1 && temp % 2 == 0 {
return false;
}
}
true
}
fn pollard_rho(n: u64) -> u64 {
if n % 2 == 0 {
return 2;
}
let mut rng = rand::thread_rng();
let mut x: u64 = rng.gen_range(2..1000000);
let c: u64 = rng.gen_range(1..1000000);
let mut y = x;
let mut d = 1u64;
let f = |x: u64| -> u64 { (x.wrapping_mul(x).wrapping_add(c)) % n };
while d == 1 {
x = f(x);
y = f(f(y));
d = gcd((x as i64 - y as i64).abs() as u64, n);
if d == n {
break;
}
}
d
}
fn prime_factorization(n: u64) -> Vec<u64> {
if n <= 0 {
panic!("Invalid input, n <= 0");
} else if n == 1 {
return vec![];
}
let mut queue = VecDeque::new();
let mut factors = Vec::new();
queue.push_back(n);
while let Some(l) = queue.pop_front() {
if rabin_miller(l) {
factors.push(l);
continue;
}
let d = pollard_rho(l);
if d == l {
queue.push_back(l);
} else {
queue.push_back(d);
queue.push_back(l / d);
}
}
factors
}
fn phi(n: u64) -> u64 {
if rabin_miller(n) {
return n - 1;
}
let mut phi = n;
let factors = prime_factorization(n);
let unique_factors: HashSet<u64> = factors.into_iter().collect();
for p in unique_factors {
phi -= phi / p;
}
phi
}
fn main() {
let mut n = 1u64;
let mut s = 0u64;
while n < 5400000{
n += 1;
s += phi(n);
}
println!("{}", s);
}
C: 30,000 = 600,000,000 / 20,000 (basic sieve)
This is a decade-old question of mine I forgot I wrote until I found it on google again. Nevertheless, I wanted to post a simple solution that is reasonably competitive and about the same length as my naive code. This is a small modification of the Sieve of Eratosthenes to use Euler's product formula that also runs in \$O(n \log \log n)\$ time. The only other easy optimization I did was only sieve and store the odd index values, since it's easy to get the even values.
For reference, this is the original python/pseudocode algorithm in 5 lines of computation:
def totient_range(n):
tots = list(range(n+1))
for p in range(2, n+1):
if p == tots[p]:
for k in range(p, n+1, p):
tots[k] -= tots[k] // p
return tots
#include "stdio.h"
#define N 600000000
unsigned t[(N+1)/2];
unsigned tots(unsigned i)
{
unsigned ih = i / 2;
if (i % 2)
return t[ih];
else
return (ih % 2) ? t[ih/2] : 2 * tots(ih);
}
int main(void)
{
for (unsigned i = 1; i <= N; i += 2)
t[i/2] = i;
for (unsigned p = 3; p <= N; p += 2) {
if (p == t[p/2]) {
for (unsigned k = p; k <= N; k += 2*p)
t[k/2] -= t[k/2] / p;
}
}
unsigned long long s = 0;
for (unsigned i = 1; i <= N; ++i)
s += tots(i);
printf("%lld\n", s);
}
Here is my Python implementation that seems to be able to crank out ~60000 numbers in 10seconds. I am factorizing numbers using the pollard rho algorithm and using the Rabin miller primality test.
from Queue import Queue
import random
def gcd ( a , b ):
while b != 0: a, b = b, a % b
return a
def rabin_miller(p):
if(p<2): return False
if(p!=2 and p%2==0): return False
s=p-1
while(s%2==0): s>>=1
for _ in xrange(10):
a=random.randrange(p-1)+1
temp=s
mod=pow(a,temp,p)
while(temp!=p-1 and mod!=1 and mod!=p-1):
mod=(mod*mod)%p
temp=temp*2
if(mod!=p-1 and temp%2==0): return False
return True
def pollard_rho(n):
if(n%2==0): return 2;
x=random.randrange(2,1000000)
c=random.randrange(2,1000000)
y=x
d=1
while(d==1):
x=(x*x+c)%n
y=(y*y+c)%n
y=(y*y+c)%n
d=gcd(x-y,n)
if(d==n): break;
return d;
def primeFactorization(n):
if n <= 0: raise ValueError("Fucked up input, n <= 0")
elif n == 1: return []
queue = Queue()
factors=[]
queue.put(n)
while(not queue.empty()):
l=queue.get()
if(rabin_miller(l)):
factors.append(l)
continue
d=pollard_rho(l)
if(d==l):queue.put(l)
else:
queue.put(d)
queue.put(l/d)
return factors
def phi(n):
if rabin_miller(n): return n-1
phi = n
for p in set(primeFactorization(n)):
phi -= (phi/p)
return phi
if __name__ == '__main__':
n = 1
s = 0
while n < 60000:
n += 1
s += phi(n)
print(s)
C#: 49,000 (980,000,000 / 20,000)
https://codegolf.stackexchange.com/a/26800 "Howard's code".
But modified, phi values are computed for odd integers.
using System;
using sw = System.Diagnostics.Stopwatch;
class Program
{
static void Main()
{
sw sw = sw.StartNew();
Console.Write(sumPhi(980000000) + " " + sw.Elapsed);
sw.Stop(); Console.Read();
}
static long sumPhi(int n) // sum phi[i] , 1 <= i <= n
{
long s = 0; int[] phi;
if (n < 1) return 0; phi = buildPhi(n + 1);
for (int i = 1; i <= n; i++) s += getPhi(i, phi);
return s;
}
static int getPhi(int i, int[] phi)
{
if ((i & 1) > 0) return phi[i >> 1];
if ((i & 3) > 0) return phi[i >> 2];
int z = ntz(i); return phi[i >> z >> 1] << z - 1;
}
static int[] buildPhi(int n) // phi[i >> 1] , i odd , i < n
{
int i, j, y, x, q, r, f; int[] phi;
if (n < 2) return new int[] { 0 };
phi = new int[n / 2]; phi[0] = 1;
for (j = 2, i = 3; i < n; i *= 3, j *= 3) phi[i >> 1] = j;
for (x = 4, i = 5; i <= n >> 1; i += x ^= 6)
{
if (phi[i >> 1] > 0) continue; phi[i >> 1] = i ^ 1;
for (j = 3, y = 3 * i; y < n; y += i << 1, j += 2)
{
if (phi[j >> 1] == 0) continue; q = j; f = i ^ 1;
while ((r = q) == i * (q /= i)) f *= i;
phi[y >> 1] = f * phi[r >> 1];
}
}
for (; i < n; i += x ^= 6) // primes > n / 2
if (phi[i >> 1] == 0)
phi[i >> 1] = i ^ 1;
return phi;
}
static int ntz(int i) // number of trailing zeros
{
int z = 1;
if ((i & 0xffff) == 0) { z += 16; i >>= 16; }
if ((i & 0x00ff) == 0) { z += 08; i >>= 08; }
if ((i & 0x000f) == 0) { z += 04; i >>= 04; }
if ((i & 0x0003) == 0) { z += 02; i >>= 02; }
return z - (i & 1);
}
}
New score: 61,000 (1,220,000,000 / 20,000)
In "App.config" I had to add "gcAllowVeryLargeObjects enabled=true".
static long sumPhi(int n)
{
int i1, i2, i3, i4, z; long s1, s2, s3, s4; int[] phi;
if (n < 1) return 0; phi = buildPhi(n + 1); n -= 4; z = 2;
i1 = 1; i2 = 2; i3 = 3; i4 = 4; s1 = s2 = s3 = s4 = 0;
if (n > 0)
for (; ; )
{
s1 += phi[i1 >> 1];
s2 += phi[i2 >> 2];
s3 += phi[i3 >> 1];
s4 += phi[i4 >> z >> 1] << z - 1;
i1 += 4; i2 += 4; i3 += 4; i4 += 4;
n -= 4; if (n < 0) break;
if (z == 2)
{
z = 3; i4 >>= 3;
while ((i4 & 3) == 0) { i4 >>= 2; z += 2; }
z += i4 & 1 ^ 1;
i4 = i3 + 1;
}
else z = 2;
}
if (n > -4) s1 += phi[i1 >> 1];
if (n > -3) s2 += phi[i2 >> 2];
if (n > -2) s3 += phi[i3 >> 1];
if (n > -1) s4 += phi[i4 >> z >> 1] << z - 1;
return s1 + s2 + s3 + s4;
}
static int[] buildPhi(int n)
{
int i, j, y, x, q0, q1, f; int[] phi;
if (n < 2) return new int[] { 0 };
phi = new int[n / 2]; phi[0] = 1;
for (uint u = 2, v = 3; v < n; v *= 3, u *= 3) phi[v >> 1] = (int)u;
for (x = 4, i = 5; i <= n >> 1; i += x ^= 6)
{
if (phi[i >> 1] > 0) continue; phi[i >> 1] = i ^ 1;
for (j = 3, y = 3 * i; y < n; y += i << 1, j += 2)
{
if (phi[j >> 1] == 0) continue; q0 = j; f = i ^ 1;
while ((q1 = q0) == i * (q0 /= i)) f *= i;
phi[y >> 1] = f * phi[q1 >> 1];
}
}
for (; i < n; i += x ^= 6)
if (phi[i >> 1] == 0)
phi[i >> 1] = i ^ 1;
return phi;
}
Python 2.7: 10.999 (165975/15090)
Pypy 2.3.1: 28.496 (430000/15090)
Some interesting methods I use:
Rabin-Miller Strong Pseudoprime Test - A primality test that provides an efficient probabilistic algorithm for determining if a given number is prime
Euler's product formula - The product is over the distinct prime numbers dividing n

Code:
import math
import random
#perform a Modular exponentiation
def modular_pow(base, exponent, modulus):
result=1
while exponent>0:
if exponent%2==1:
result=(result * base)%modulus
exponent=exponent>>1
base=(base * base)%modulus
return result
#Miller-Rabin primality test
def checkMillerRabin(n,k):
if n==2: return True
if n==1 or n%2==0: return False
#find s and d, with d odd
s=0
d=n-1
while(d%2==0):
d/=2
s+=1
assert (2**s*d==n-1)
#witness loop
composite=1
for i in xrange(k):
a=random.randint(2,n-1)
x=modular_pow(a,d,n)
if x==1 or x==n-1: continue
for j in xrange(s-1):
composite=1
x=modular_pow(x,2,n)
if x==1: return False #is composite
if x==n-1:
composite=0
break
if composite==1:
return False #is composite
return True #is probably prime
def findPrimes(n): #generate a list of primes, using the sieve of eratosthenes
primes=(n+2)*[True]
for i in range(2,int(math.sqrt(n))+1):
if primes[i]==True:
for j in range(i**2,n+1,i):
primes[j]=False
primes=[i for i in range(2,len(primes)-1) if primes[i]==True]
return primes
def primeFactorization(n,primes): #find the factors of a number
factors=[]
i=0
while(n!=1):
if(n%primes[i]==0):
factors.append(primes[i])
n/=primes[i]
else:
i+=1
return factors
def phi(n,primes):
#some useful properties
if (checkMillerRabin(n,10)==True): #fast prime check
return n-1
factors=primeFactorization(n,primes) #prime factors
distinctive_prime_factors=set(factors)
totient=n
for f in distinctive_prime_factors: #phi = n * sum (1 - 1/p), p is a distinctive prime factor
totient*=(1-1.0/f);
return totient
if __name__ == '__main__':
s=0
N=165975
# N=430000
primes=findPrimes(N) #upper bound for the number of primes
for i in xrange(1,N):
s+=phi(i,primes)
print "Sum =",s
Nimrod: ~38,667 (580,000,000/15,000)
This answer uses a pretty simple approach. The code employs a simple prime number sieve that stores the prime of the smallest prime power in each slot for composite numbers (zero for primes), then uses dynamic programming to construct the totient function over the same range, then sums the results. The program spends virtually all its time constructing the sieve, then calculates the totient function in a fraction of the time. It looks like it comes down to constructing an efficient sieve (with the slight twist that one also has to be able to extract a prime factor for composite numbers from the result and has to keep memory usage at a reasonable level).
Update: Improved performance by reducing memory footprint and improving cache behavior. It's possible to squeeze out 5%-10% more performance, but the increase in code complexity is not worth it. Ultimately, this algorithm primarily exercises a CPU's von Neumann bottleneck, and there are very few algorithmic tweaks that can get around that.
Also updated the performance divisor since the C++ code wasn't meant to be compiled with all optimizations on and nobody else did it. :)
Update 2: Optimized sieve operation for improved memory access. Now handling small primes in bulk via memcpy() (~5% speedup) and skipping multiples of 2, 3, and 5 when sieving bigger primes (~10% speedup).
C++ code: 9.9 seconds (with g++ 4.9)
Nimrod code: 9.9 seconds (with -d:release, gcc 4.9 backend)
proc handleSmallPrimes(sieve: var openarray[int32], m: int) =
# Small primes are handled as a special case through what is ideally
# the system's highly optimized memcpy() routine.
let k = 2*3*5*7*11*13*17
var sp = newSeq[int32](k div 2)
for i in [3,5,7,11,13,17]:
for j in countup(i, k, 2*i):
sp[j div 2] = int32(i)
for i in countup(0, sieve.high, len(sp)):
if i + len(sp) <= len(sieve):
copyMem(addr(sieve[i]), addr(sp[0]), sizeof(int32)*len(sp))
else:
copyMem(addr(sieve[i]), addr(sp[0]), sizeof(int32)*(len(sieve)-i))
# Fixing up the numbers for values that are actually prime.
for i in [3,5,7,11,13,17]:
sieve[i div 2] = 0
proc constructSieve(m: int): seq[int32] =
result = newSeq[int32](m div 2 + 1)
handleSmallPrimes(result, m)
var i = 19
# Having handled small primes, we only consider candidates for
# composite numbers that are relatively prime with 31. This cuts
# their number almost in half.
let steps = [ 1, 7, 11, 13, 17, 19, 23, 29, 31 ]
var isteps: array[8, int]
while i * i <= m:
if result[i div 2] == 0:
for j in 0..7: isteps[j] = i*(steps[j+1]-steps[j])
var k = 1 # second entry in "steps mod 30" list.
var j = 7*i
while j <= m:
result[j div 2] = int32(i)
j += isteps[k]
k = (k + 1) and 7 # "mod 30" list has eight elements.
i += 2
proc calculateAndSumTotients(sieve: var openarray[int32], n: int): int =
result = 1
for i in 2'i32..int32(n):
var tot: int32
if (i and 1) == 0:
var m = i div 2
var pp: int32 = 2
while (m and 1) == 0:
pp *= 2
m = m div 2
if m == 1:
tot = pp div 2
else:
tot = (pp div 2) * sieve[m div 2]
elif sieve[i div 2] == 0: # prime?
tot = i - 1
sieve[i div 2] = tot
else:
# find and extract the first prime power pp.
# It's relatively prime with i/pp.
var p = sieve[i div 2]
var m = i div p
var pp = p
while m mod p == 0 and m != p:
pp *= p
m = m div p
if m == p: # is i a prime power?
tot = pp*(p-1)
else:
tot = sieve[pp div 2] * sieve[m div 2]
sieve[i div 2] = tot
result += tot
proc main(n: int) =
var sieve = constructSieve(n)
let totSum = calculateAndSumTotients(sieve, n)
echo totSum
main(580_000_000)
Nimrod: ~2,333,333 (42,000,000,000/18,000)
This uses an entirely different approach from my previous answer. See comments for details. The longint module can be found here.
import longint
const max = 500_000_000
var ts_mem: array[1..max, int]
# ts(n, d) is defined as the number of pairs (a,b)
# such that 1 <= a <= b <= n and gcd(a,b) = d.
#
# The following equations hold:
#
# ts(n, d) = ts(n div d, 1)
# sum for i in 1..n of ts(n, i) = n*(n+1)/2
#
# This leads to the recurrence:
# ts(n, 1) = n*(n+1)/2 - sum for i in 2..n of ts(n, i)
#
# or, where ts(n) = ts(n, 1):
# ts(n) = n*(n+1)/2 - sum for i in 2..n of ts(n div i)
#
# Note that the large numbers that we deal with can
# overflow 64-bit integers.
proc ts(n, gcd: int): int =
if n == 0:
result = 0
elif n == 1 and gcd == 1:
result = 1
elif gcd == 1:
result = n*(n+1) div 2
for i in 2..n:
result -= ts(n, i)
else:
result = ts(n div gcd, 1)
# Below is the optimized version of the same algorithm.
proc ts(n: int): int =
if n == 0:
result = 0
elif n == 1:
result = 1
else:
if n <= max and ts_mem[n] > 0:
return ts_mem[n]
result = n*(n+1) div 2
var p = n
var k = 2
while k < n div k:
let pold = p
p = n div k
k += 1
let t = ts(n div pold)
result -= t * (pold-p)
while p >= 2:
result -= ts(n div p)
p -= 1
if n <= max:
ts_mem[n] = result
proc ts(n: int128): int128 =
if n <= 2_000_000_000:
result = ts(n.toInt)
else:
result = n*(n+1) div 2
var p = n
var k = 2
while k < n div k:
let pold = p
p = n div k
k += 1
let t = ts(n div pold)
result = result - t * (pold-p)
while p >= 2:
result = result - ts(n div p)
p = p - 1
echo ts(42_000_000_000.toInt128)
Python 3: ~24000 (335,000,000 / 14,000)
My version is a Python port of Howard's algorithm. My original function was a modification of an algorithm introduced in this blogpost.
I'm using Numpy and Numba modules to speed up the execution time. Note that normally you don't need to declare the types of the local variables (when using Numba), but in this case I wanted to squeeze the execution time as much as possible.
Edit: combined constructsieve and summarum functions into a single function.
C++: 9.99s (n = 14,000); Python 3: 9.94s (n = 335,000,000)
import numba as nb
import numpy as np
import time
n = 335000000
@nb.njit("i8(i4[:])", locals=dict(
n=nb.int32, s=nb.int64, i=nb.int32,
j=nb.int32, q=nb.int32, f=nb.int32))
def summarum(phi):
s = 0
phi[1] = 1
i = 2
while i < n:
if phi[i] == 0:
phi[i] = i - 1
j = 2
while j * i < n:
if phi[j] != 0:
q = j
f = i - 1
while q % i == 0:
f *= i
q //= i
phi[i * j] = f * phi[q]
j += 1
s += phi[i]
i += 1
return s
if __name__ == "__main__":
s1 = time.time()
a = summarum(np.zeros(n, np.int32))
s2 = time.time()
print(a)
print("{}s".format(s2 - s1))
Java, score ~24,000 (360,000,000 / 15,000)
The java code below does calculation of the totient function and the prime sieve together. Note that depending on your machine you have to increase the initial/maximum heap size (on my rather slow laptop I had to go up to -Xmx3g -Xms3g).
public class Totient {
final static int size = 360000000;
final static int[] phi = new int[size];
public static void main(String[] args) {
long time = System.currentTimeMillis();
long sum = 0;
phi[1] = 1;
for (int i = 2; i < size; i++) {
if (phi[i] == 0) {
phi[i] = i - 1;
for (int j = 2; i * j < size; j++) {
if (phi[j] == 0)
continue;
int q = j;
int f = i - 1;
while (q % i == 0) {
f *= i;
q /= i;
}
phi[i * j] = f * phi[q];
}
}
sum += phi[i];
}
System.out.println(System.currentTimeMillis() - time);
System.out.println(sum);
}
}
Matlab: 1464 = 26355867/ 18000
I can't test your code so I divided by 18000 as it represents the fastest computer of those who tested. I came to the score using this property:
- if p is prime, phi(p) = p - 1 (for p < 10^20)
I mostly like that it is a one liner:
sum(primes(500000000)-1)
φ(2n) = 2n − 1
Σ φ(2i) = 2i − 1 for i from 1 to n
First, something to find times:
import os
from time import perf_counter
SEARCH_LOWER = -1
SEARCH_HIGHER = 1
def integer_binary_search(start, lower=None, upper=None, big_jump=1):
if lower is not None and lower == upper:
raise StopIteration # ?
result = yield start
if result == SEARCH_LOWER:
if lower is None:
yield from integer_binary_search(
start=start - big_jump,
lower=None,
upper=start - 1,
big_jump=big_jump * 2)
else:
yield from integer_binary_search(
start=(lower + start) // 2,
lower=lower,
upper=start - 1)
elif result == SEARCH_HIGHER:
if upper is None:
yield from integer_binary_search(
start=start + big_jump,
lower=start + 1,
upper=None,
big_jump=big_jump * 2)
else:
yield from integer_binary_search(
start=(start + upper) // 2,
lower=start + 1,
upper=upper)
else:
raise ValueError('Expected SEARCH_LOWER or SEARCH_HIGHER.')
search = integer_binary_search(start=1000, lower=1, upper=None, big_jump=2500)
n = search.send(None)
while True:
print('Trying with %d iterations.' % (n,))
os.spawnlp(
os.P_WAIT,
'g++', 'g++', '-Wall', '-Wextra', '-pedantic', '-O0', '-o', 'reference',
'-DITERATIONS=%d' % (n,),
'reference.cpp')
start = perf_counter()
os.spawnl(os.P_WAIT, './reference', './reference')
end = perf_counter()
t = end - start
if t >= 10.1:
n = search.send(SEARCH_LOWER)
elif t <= 9.9:
n = search.send(SEARCH_HIGHER)
else:
print('%d iterations in %f seconds!' % (n, t))
break
For the reference code, for me, that’s:
…
Trying with 14593 iterations.
64724364
14593 iterations in 9.987747 seconds!
Now, Haskell:
import System.Environment (getArgs)
phiSum :: Integer -> Integer
phiSum n = 2 ^ n - 1
main :: IO ()
main = getArgs >>= print . phiSum . (2^) . read . head
It makes something with 2525224 digits in 0.718 seconds. And now I’m just noticing @Howard’s comment.