Simon Danisch / Dec 05 2018
with Philippa Markovics
What Do Numbers Look Like?
What Do Numbers Look Like?
This article reproduces the code from johnhw’s essay What Do Numbers Look Like.
pip install umap-learn
16.9s
Language:Python
### JHW 2018 import numpy as np import umap # This code from the excellent module at: # https://stackoverflow.com/questions/4643647/fast-prime-factorization-module import random _known_factors = {} totients = {} def primesbelow(N): # http://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188 #""" Input N>=6, Returns a list of primes, 2 <= p < N """ correction = N % 6 > 1 N = {0:N, 1:N-1, 2:N+4, 3:N+3, 4:N+2, 5:N+1}[N%6] sieve = [True] * (N // 3) sieve[0] = False for i in range(int(N ** .5) // 3 + 1): if sieve[i]: k = (3 * i + 1) | 1 sieve[k*k // 3::2*k] = [False] * ((N//6 - (k*k)//6 - 1)//k + 1) sieve[(k*k + 4*k - 2*k*(i%2)) // 3::2*k] = [False] * ((N // 6 - (k*k + 4*k - 2*k*(i%2))//6 - 1) // k + 1) return [2, 3] + [(3 * i + 1) | 1 for i in range(1, N//3 - correction) if sieve[i]] N = 1000000 smallprimeset = set(primesbelow(N)) _smallprimeset = N smallprimes = primesbelow(10000000) prime_ix = {p:i for i,p in enumerate(smallprimes)} def isprime(n, precision=7): # http://en.wikipedia.org/wiki/Miller-Rabin_primality_test#Algorithm_and_running_time if n < 1: raise ValueError("Out of bounds, first argument must be > 0") elif n <= 3: return n >= 2 elif n % 2 == 0: return False elif n < _smallprimeset: return n in smallprimeset d = n - 1 s = 0 while d % 2 == 0: d //= 2 s += 1 for repeat in range(precision): a = random.randrange(2, n - 2) x = pow(a, d, n) if x == 1 or x == n - 1: continue for r in range(s - 1): x = pow(x, 2, n) if x == 1: return False if x == n - 1: break else: return False return True # https://comeoncodeon.wordpress.com/2010/09/18/pollard-rho-brent-integer-factorization/ def pollard_brent(n): if n % 2 == 0: return 2 if n % 3 == 0: return 3 y, c, m = random.randint(1, n-1), random.randint(1, n-1), random.randint(1, n-1) g, r, q = 1, 1, 1 while g == 1: x = y for i in range(r): y = (pow(y, 2, n) + c) % n k = 0 while k < r and g==1: ys = y for i in range(min(m, r-k)): y = (pow(y, 2, n) + c) % n q = q * abs(x-y) % n g = gcd(q, n) k += m r *= 2 if g == n: while True: ys = (pow(ys, 2, n) + c) % n g = gcd(abs(x - ys), n) if g > 1: break return g def _primefactors(n, sort=False): factors = [] for checker in smallprimes: while n % checker == 0: factors.append(checker) n //= checker # early exit memoization if n in _known_factors: return factors + _known_factors[n] if checker > n: break if n < 2: return factors while n > 1: if isprime(n): factors.append(n) break factor = pollard_brent(n) # trial division did not fully factor, switch to pollard-brent factors.extend(primefactors(factor)) # recurse to factor the not necessarily prime factor returned by pollard-brent n //= factor if sort: factors.sort() return factors def primefactors(n, sort=False): if n in _known_factors: return _known_factors[n] result = _primefactors(n) _known_factors[n] = result return result from collections import defaultdict def factorization(n): factors = defaultdict(int) for p1 in primefactors(n): factors[p1] += 1 return factors def unique_factorise(n): return set(primefactors(n)) def totient(n): if n == 0: return 1 try: return totients[n] except KeyError: pass tot = 1 for p, exp in factorization(n).items(): tot *= (p - 1) * p ** (exp - 1) totients[n] = tot return tot def gcd(a, b): if a == b: return a while b > 0: a, b = b, a % b return a def lcm(a, b): return abs((a // gcd(a, b)) * b) ### end ## Create sparse binary factor vectors for any number, and assemble into a matrix ## One column for each unique prime factor ## One row for each number, 0=does not have this factor, 1=does have this factor (might be repeated) from scipy.special import expi import scipy.sparse def factor_vector_lil(n): ## approximate prime counting function (upper bound for the values we are interested in) ## gives us the number of rows (dimension of our space) d = int(np.ceil(expi(np.log(n)))) x = scipy.sparse.lil_matrix((n,d)) for i in range(2,n): for k,v in factorization(i).items(): x[i,prime_ix[k]] = 1 if i%100000==0: # just check it is still alive... print(i) return x ### Generate the matrix for 1 million integers n = 100000 X = factor_vector_lil(n) # embed with UMAP embedding = umap.UMAP(metric='cosine', n_epochs=500).fit_transform(X) # save for later np.savez('/results/1e6_pts.npz', embedding=embedding);
# and save the image from matplotlib import pyplot as plt fig = plt.figure(figsize=(8,8)) fig.patch.set_facecolor('black') plt.scatter( embedding[:,0], embedding[:,1], marker='o', s=1, edgecolor='', c=np.arange(n), cmap="magma" ) plt.axis("off") plt.savefig("results/primes_umap_1e6_16k_smaller_pts.png", dpi=100, facecolor='black')