{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "<div style=\"text-align: right\">Peter Norvig, 20 Jan 2017</div>\n",
    "\n",
    "# Python Utilities for Project Euler\n",
    "\n",
    "After showing my utilities for [Advent of Code](http://adventofcode.org), I got some feedback:\n",
    "\n",
    " 1. Some of these are recipes in the `itertools` module (with different names).\n",
    " 2. What about  utilities for [Project Euler](https://projecteuler.net/about)?\n",
    " \n",
    "My answers: \n",
    "\n",
    " 1. I agree! I have renamed some of my utilities to be consistent with the `itertools` recipes.\n",
    " 2. Here you go."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Imports\n",
    "\n",
    "In Project Euler I am writing short programs for my own consumption, so brevity is important, and I use `\"from\"` imports more often than I normally would:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from collections import defaultdict, deque, Counter, namedtuple, abc\n",
    "from fractions   import Fraction\n",
    "from functools   import lru_cache, wraps\n",
    "from itertools   import chain, cycle, islice, combinations, permutations, repeat, takewhile, zip_longest\n",
    "from itertools   import product as crossproduct, count as count_from\n",
    "from math        import ceil, floor, factorial, gcd, log, sqrt, inf\n",
    "import random\n",
    "import time"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Utilities\n",
    "\n",
    "Here are the general utility functions (and data objects) I define:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "million  = 10 ** 6      # 1,000,000\n",
    "Ø        = frozenset()  # Empty set\n",
    "distinct = set          # Function to return the distinct elements of a collection of hashables\n",
    "identity = lambda x: x  # The function that returns the argument\n",
    "cat      = ''.join      # Concatenate strings\n",
    "\n",
    "def first(iterable, default=False):\n",
    "    \"Return the first element of an iterable, or default if it is empty.\"\n",
    "    return next(iter(iterable), default)\n",
    "\n",
    "def first_true(iterable, pred=None, default=None):\n",
    "    \"\"\"Returns the first true value in the iterable.\n",
    "    If no true value is found, returns *default*\n",
    "    If *pred* is not None, returns the first item\n",
    "    for which pred(item) is true.\"\"\"\n",
    "    # first_true([a,b,c], default=x) --> a or b or c or x\n",
    "    # first_true([a,b], fn, x) --> a if fn(a) else b if fn(b) else x\n",
    "    return next(filter(pred, iterable), default)\n",
    "\n",
    "def upto(iterable, maxval):\n",
    "    \"From a monotonically increasing iterable, generate all the values <= maxval.\"\n",
    "    # Why <= maxval rather than < maxval? In part because that's how Ruby's upto does it.\n",
    "    return takewhile(lambda x: x <= maxval, iterable)\n",
    "\n",
    "def multiply(numbers):\n",
    "    \"Multiply all the numbers together.\"\n",
    "    result = 1\n",
    "    for n in numbers:\n",
    "        result *= n\n",
    "    return result\n",
    "\n",
    "def transpose(matrix): return tuple(zip(*matrix))\n",
    "\n",
    "def isqrt(n):\n",
    "    \"Integer square root (rounds down).\"\n",
    "    return int(n ** 0.5)\n",
    "\n",
    "def ints(start, end):\n",
    "    \"The integers from start to end, inclusive. Equivalent to range(start, end+1)\"\n",
    "    return range(start, end+1)\n",
    "\n",
    "def groupby(iterable, key=identity):\n",
    "    \"Return a dict of {key(item): [items...]} grouping all items in iterable by keys.\"\n",
    "    groups = defaultdict(list)\n",
    "    for item in iterable:\n",
    "        groups[key(item)].append(item)\n",
    "    return groups\n",
    "\n",
    "def grouper(iterable, n, fillvalue=None):\n",
    "    \"\"\"Collect data into fixed-length chunks:\n",
    "    grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx\"\"\"\n",
    "    args = [iter(iterable)] * n\n",
    "    return zip_longest(*args, fillvalue=fillvalue)\n",
    "\n",
    "def overlapping(iterable, n):\n",
    "    \"\"\"Generate all (overlapping) n-element subsequences of iterable.\n",
    "    overlapping('ABCDEFG', 3) --> ABC BCD CDE DEF EFG\"\"\"\n",
    "    if isinstance(iterable, abc.Sequence):\n",
    "        yield from (iterable[i:i+n] for i in range(len(iterable) + 1 - n))\n",
    "    else:\n",
    "        result = deque(maxlen=n)\n",
    "        for x in iterable:\n",
    "            result.append(x)\n",
    "            if len(result) == n:\n",
    "                yield tuple(result)\n",
    "                \n",
    "def pairwise(iterable):\n",
    "    \"s -> (s0,s1), (s1,s2), (s2, s3), ...\"\n",
    "    return overlapping(iterable, 2)\n",
    "                \n",
    "def sequence(iterable, type=tuple):\n",
    "    \"Coerce iterable to sequence: leave it alone if it is already a sequence, else make it of type.\"\n",
    "    return iterable if isinstance(iterable, abc.Sequence) else type(iterable)\n",
    "\n",
    "def join(iterable, sep=''):\n",
    "    \"Join the itemsin iterable, converting each to a string first.\"\n",
    "    return sep.join(map(str, iterable))\n",
    "\n",
    "def grep(pattern, lines):\n",
    "    \"Print lines that match pattern.\"\n",
    "    for line in lines:\n",
    "        if re.search(pattern, line):\n",
    "            print(line)\n",
    "            \n",
    "def nth(iterable, n, default=None):\n",
    "    \"Returns the nth item (or a default value).\"\n",
    "    return next(islice(iterable, n, None), default)\n",
    "\n",
    "def ilen(iterable):\n",
    "    \"Length of any iterable (consumes generators).\"\n",
    "    return sum(1 for _ in iterable)\n",
    "\n",
    "def quantify(iterable, pred=bool):\n",
    "    \"Count how many times the predicate is true.\"\n",
    "    return sum(map(pred, iterable))\n",
    "\n",
    "def powerset(iterable):\n",
    "    \"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)\"\n",
    "    seq = sequence(iterable)\n",
    "    return flatten(combinations(seq, r) for r in range(len(seq) + 1))\n",
    "\n",
    "def shuffled(iterable):\n",
    "    \"Create a new list out of iterable, and shuffle it.\"\n",
    "    new = list(iterable)\n",
    "    random.shuffle(new)\n",
    "    return new\n",
    "    \n",
    "flatten = chain.from_iterable\n",
    "\n",
    "def int_cache(f):\n",
    "    \"\"\"Like lru_cache, but designed for functions that take a non-negative integer as argument;\n",
    "    when you ask for f(n), this caches all lower values of n first. That way, even if f(n) \n",
    "    recursively calls f(n-1), you will never run into recursionlimit problems.\"\"\"\n",
    "    cache = [] # cache[i] holds the result of f(i)\n",
    "    @wraps(f)\n",
    "    def memof(n):\n",
    "        for i in ints(len(cache), n):\n",
    "            cache.append(f(i))\n",
    "        return cache[n]\n",
    "    memof._cache = cache\n",
    "    return memof"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Primes\n",
    "\n",
    "My class `Primes` does what I need for the many Project Euler problems that involve primes:\n",
    "\n",
    "* Iterate through the primes up to 2 million.\n",
    "* Instantly check whether an integer up to 2 million is a prime.\n",
    "* With a bit more computation, check if, say, a 12-digit integer is prime.\n",
    "\n",
    "I precompute the primes up to 2 million, using \n",
    "a [Sieve of Eratosthenes](https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes), and then cache\n",
    "the primes as both a list (to iterate through) and a set (to check membership). If there are `n`\n",
    "primes currently cached and you ask for `primes[n+1]` (either directly, or indirectly by iterating over `primes`), \n",
    "then the cache will be automatically doubled in size. But if you just ask if, say, \"`123456789011 in primes`\",\n",
    "then I use repeted trial division without extending the cache."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 144 ms, sys: 18.7 ms, total: 163 ms\n",
      "Wall time: 190 ms\n"
     ]
    }
   ],
   "source": [
    "class Primes:\n",
    "    \"\"\"Given `primes = Primes(2 * million)`, we can do the following:\n",
    "    * for p in primes:                  # iterate over infinite sequence of primes\n",
    "    * 37 in primes => True              # primality test\n",
    "    * primes[0] => 2, primes[1] => 3    # nth prime\n",
    "    * primes[:5] => [2, 3, 5, 7, 11]    # first 5 primes\n",
    "    * primes[5:9] => [13, 17, 19, 23]   # slice of primes \n",
    "    * primes.upto(10) => 2, 3, 5, 7     # generate primes less than or equal to given value\"\"\"\n",
    "\n",
    "    def __init__(self, n):\n",
    "        \"Create an iterable generator of primes, with initial cache of all primes <= n.\"\n",
    "        # sieve keeps track of odd numbers: sieve[i] is True iff (2*i + 1) has no factors (yet) \n",
    "        N = n // 2 # length of sieve\n",
    "        sieve = [True] * N\n",
    "        for i in range(3, isqrt(n) + 1, 2):\n",
    "            if sieve[i // 2]: # i is prime\n",
    "                # Mark start, start + i, start + 2i, ... as non-prime\n",
    "                start = i ** 2 // 2\n",
    "                sieve[start::i] = repeat(False, len(range(start, N, i)))\n",
    "        self._list = [2] + [2*i+1 for i in range(1, N) if sieve[i]]\n",
    "        self._set  = set(self._list)\n",
    "        self.maxn  = n # We have tested for all primes < self.maxn\n",
    "\n",
    "    def __contains__(self, n):\n",
    "        \"Is n a prime?\"\n",
    "        # If n is small, look in _set; otherwise try prime factors up to sqrt(n)\n",
    "        if n <= self.maxn:\n",
    "            return n in self._set\n",
    "        else:\n",
    "            return not any(n % p == 0 for p in self.upto(n ** 0.5))\n",
    "\n",
    "    def __getitem__(self, index):\n",
    "        \"Return the ith prime, or a slice: primes[0] = 2; primes[1] = 3; primes[1:4] = [3, 5, 7].\"\n",
    "        stop = (index.stop if isinstance(index, slice) else index)\n",
    "        if stop is None or stop < 0:\n",
    "            raise IndexError('Number of primes is infinite: https://en.wikipedia.org/wiki/Euclid%27s_theorem')\n",
    "        while len(self._list) <= stop:\n",
    "            # If asked for the ith prime and we don't have it yet, we will expand the cache.\n",
    "            self.__init__(2 * self.maxn)\n",
    "        return self._list[index]\n",
    "        \n",
    "    def upto(self, n):\n",
    "        \"Yield all primes <= n.\"\n",
    "        if self.maxn < n:\n",
    "            self.__init__(max(n, 2 * self.maxn))\n",
    "        return upto(self._list, n)\n",
    "        \n",
    "%time primes  = Primes(2 * million)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "There are 148,933 primes under 2 million, which is a small enough number that I'm not concerned with the memory consumed by `._list` and `._set`. If I needed to store 100 million primes,  I would make different tradeoffs. For example, instead of a list and a set, I would probably just keep `sieve`, and make it be an `array('B')`. This would take less space (but for \"small\" sizes like 2 million, the current implementation is both faster and simpler).\n",
    "\n",
    "\n",
    "# Factors\n",
    "\n",
    "Project Euler also has probems about prime factors, and divisors. I need to:\n",
    "\n",
    "* Quickly find the prime factors of any integer up to a million.\n",
    "* With a bit more computation, find the prime factors of a 12-digit integer.\n",
    "* Find the complete factorization of a number.\n",
    "* Compute Euler's totient function.\n",
    "\n",
    "I will cache the factors of all the integers up to a million.  To be more precise, I don't actually keep a list of all the factors of each integer; I only keep the largest prime factor. From that, I can easily compute all the other factors by repeated division. If asked for the factors of a number greater than a million, I do trial division until I get it under a million. In addition, `Factors` provides `totient(n)` for computing [Euler's totient function](https://en.wikipedia.org/wiki/Euler's_totient_function), or Φ(n), and `ndivisors(n)` for the total [number of divisors](http://primes.utm.edu/glossary/xpage/tau.html) of `n`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "class Factors:\n",
    "    \"\"\"Given `factors = Factors(million)`, we can do the following:\n",
    "    * factors(360) => [5, 3, 3, 2, 2, 2]  # prime factorization\n",
    "    * factors.largest[360] => 5           # largest prime factor\n",
    "    * distinct(factors(360)) => {2, 3, 5} # distinct prime factors\n",
    "    * factors.ndivisors(28) => 6          # How many positive integers divide n?\n",
    "    * factors.totient(36) => 12           # How many integers below n are relatively prime to n?\"\"\"\n",
    "    def __init__(self, maxn):\n",
    "        \"Initialize largest[n] to be the largest prime factor of n, for n < maxn.\"\n",
    "        self.largest = [1] * maxn\n",
    "        for p in primes.upto(maxn):\n",
    "            self.largest[p::p] = repeat(p, len(range(p, maxn, p)))\n",
    "            \n",
    "    def ndivisors(self, n):\n",
    "        \"The number of divisors of n.\"\n",
    "        # If n = a**x * b**y * ..., then ndivisors(n) = (x+1) * (y+1) * ...\n",
    "        exponents = Counter(self(n)).values()\n",
    "        return multiply(x + 1 for x in exponents)\n",
    "        \n",
    "    def totient(self, n):\n",
    "        \"Euler's Totient function, Φ(n): number of integers < n that are relatively prime to n.\"\n",
    "        # totient(n) = n∏(1 - 1/p) for p ∈ distinct(factors(n))\n",
    "        return int(n * multiply(1 - Fraction(1, p) for p in distinct(self(n))))\n",
    "      \n",
    "    def __call__(self, n):\n",
    "        \"Return a list of the numbers in the prime factorization of n.\"\n",
    "        result = []\n",
    "        # Need to make n small enough so that it is in the self.largest table\n",
    "        if n >= len(self.largest):\n",
    "            for p in primes:\n",
    "                while n % p == 0:\n",
    "                    result.append(p)\n",
    "                    n = n // p\n",
    "                if n < len(self.largest):\n",
    "                    break\n",
    "        # Now n is in the self.largest table; divide by largest[n] repeatedly:\n",
    "        while n > 1:\n",
    "            p = self.largest[n]\n",
    "            result.append(p)\n",
    "            n = n // p\n",
    "        return result\n",
    "    \n",
    "factors = Factors(million)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "148933"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(primes._list)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Tests\n",
    "\n",
    "Here are some unit tests (which also serve as usage examples):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'pass'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def tests():\n",
    "    global primes, factors\n",
    "    primes = Primes(2 * million)\n",
    "    factors = Factors(million)\n",
    "    \n",
    "    assert first('abc') == first(['a', 'b', 'c']) == 'a'\n",
    "    assert first(primes) == 2\n",
    "    assert cat(upto('abcdef', 'd')) == 'abcd'\n",
    "    assert multiply([1, 2, 3, 4]) == 24\n",
    "    assert transpose(((1, 2, 3), (4, 5, 6))) == ((1, 4), (2, 5), (3, 6))\n",
    "    assert isqrt(9) == 3 == isqrt(10)\n",
    "    assert ints(1, 100) == range(1, 101)\n",
    "    assert identity('anything') == 'anything'\n",
    "    assert groupby([-3, -2, -1, 1, 2], abs) == {1: [-1, 1], 2: [-2, 2], 3: [-3]}\n",
    "    assert sequence('seq') == 'seq'\n",
    "    assert sequence((i**2 for i in range(5))) == (0, 1, 4, 9, 16)\n",
    "    assert join(range(5)) == '01234'\n",
    "    assert join(range(5), ', ') == '0, 1, 2, 3, 4'\n",
    "    assert cat(['do', 'g']) == 'dog'\n",
    "    assert nth('abc', 1) == nth(iter('abc'), 1) == 'b'\n",
    "    assert quantify(['testing', 1, 2, 3, int, len], callable) == 2 # int and len are callable\n",
    "    assert quantify([0, False, None, '', [], (), {}, 42]) == 1  # Only 42 is truish\n",
    "    assert set(powerset({1, 2, 3})) == {(), (1,), (1, 2), (1, 2, 3), (1, 3), (2,), (2, 3), (3,)}\n",
    "    assert first_true([0, None, False, {}, 42, 43]) == 42\n",
    "    assert list(grouper(range(8), 3)) == [(0, 1, 2), (3, 4, 5), (6, 7, None)]\n",
    "    assert list(pairwise((0, 1, 2, 3, 4))) == [(0, 1), (1, 2), (2, 3), (3, 4)]\n",
    "    assert list(overlapping((0, 1, 2, 3, 4), 3)) == [(0, 1, 2), (1, 2, 3), (2, 3, 4)]\n",
    "    assert list(overlapping('abcdefg', 4)) == ['abcd', 'bcde', 'cdef', 'defg']    \n",
    "    @int_cache\n",
    "    def fib(n): return (n if n <= 1 else fib(n - 1) + fib(n - 2))\n",
    "    f = str(fib(10000))\n",
    "    assert len(f) == 2090 and f.startswith('33644') and f.endswith('66875')\n",
    "\n",
    "    assert 37 in primes\n",
    "    assert primes[0] == 2 and primes[1] == 3 and primes[10] == 31\n",
    "    assert primes[:5] == [2, 3, 5, 7, 11]\n",
    "    assert primes[5:9] == [13, 17, 19, 23]\n",
    "    assert 42 not in primes\n",
    "    assert 1299721 in primes\n",
    "    assert million not in primes\n",
    "    assert (2 ** 13 - 1) in primes\n",
    "    assert (2 ** 31 - 1) in primes\n",
    "    assert list(primes.upto(33)) == [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31]\n",
    "    assert primes.maxn == 2 * million # Make sure we didn't extend cache\n",
    "    assert len(primes._set) == len(primes._list) == 148933\n",
    "    \n",
    "    assert factors(720) == [5, 3, 3, 2, 2, 2, 2]\n",
    "    assert distinct(factors(720)) == {2, 3, 5}\n",
    "    assert factors(37) == [37]\n",
    "    assert distinct(factors(72990720)) == {2, 3, 5, 11}\n",
    "    assert factors.ndivisors(6) == 4\n",
    "    assert factors.ndivisors(28) == 6\n",
    "    assert factors.ndivisors(720) == 30\n",
    "    assert factors.largest[720] == 5\n",
    "    assert factors.totient(36) == 12\n",
    "    assert factors.totient(43) == 42\n",
    "    for n in (28, 36, 37, 99, 101): \n",
    "        assert list(primes.upto(n)) == list(upto(primes, n))\n",
    "        assert factors.totient(n) == quantify(gcd(n, d) == 1 for d in ints(1, n))\n",
    "        assert n == sum(factors.totient(d) for d in ints(1, n) if n % d == 0)\n",
    "\n",
    "\n",
    "\n",
    "    return 'pass'\n",
    "\n",
    "tests()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Timing\n",
    "\n",
    "My implementation is fast enough to solve Project Euler problems, as you can see from the timing numbers below:\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 130 ms, sys: 14.7 ms, total: 144 ms\n",
      "Wall time: 151 ms\n",
      "CPU times: user 166 ms, sys: 10.4 ms, total: 177 ms\n",
      "Wall time: 178 ms\n"
     ]
    }
   ],
   "source": [
    "# Instantiate both primes and factors\n",
    "%time primes = Primes(2 * million)\n",
    "%time factors = Factors(million)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 5 µs, sys: 1e+03 ns, total: 6 µs\n",
      "Wall time: 9.06 µs\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Check primality for numbers in cache\n",
    "%time 1000003 in primes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 5 µs, sys: 0 ns, total: 5 µs\n",
      "Wall time: 8.11 µs\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time 1000001 in primes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 100 µs, sys: 1 µs, total: 101 µs\n",
      "Wall time: 103 µs\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Check primality for numbers beyond the cache\n",
    "%time 2000003 in primes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 10 µs, sys: 1e+03 ns, total: 11 µs\n",
      "Wall time: 16 µs\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[19753, 5]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Factor numbers in cache\n",
    "%time factors(98765)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 9 µs, sys: 1 µs, total: 10 µs\n",
      "Wall time: 11.9 µs\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[5, 5, 5, 5, 3, 3, 3, 3, 2, 2, 2, 2]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time factors(810000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 8.38 ms, sys: 247 µs, total: 8.63 ms\n",
      "Wall time: 11.3 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[74843, 74843]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Factor numbers beyond the cache\n",
    "%time factors(74843 ** 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "31999727999744007303991249934683126567194480546211\n",
      "CPU times: user 152 ms, sys: 2.27 ms, total: 155 ms\n",
      "Wall time: 168 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[1000003, 1000003, 1000003, 1999993, 1999993, 1999993, 1999993, 1999993]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = 1000003 ** 3 * 1999993 ** 5\n",
    "print(x)\n",
    "%time factors(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 15.5 ms, sys: 810 µs, total: 16.3 ms\n",
      "Wall time: 19.1 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "37550402023"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time sum(primes.upto(million))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 2.67 ms, sys: 152 µs, total: 2.82 ms\n",
      "Wall time: 3.99 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "62260698721"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# sum of the first 100,000 primes\n",
    "%time sum(primes[:100000])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 62.9 ms, sys: 1.78 ms, total: 64.6 ms\n",
      "Wall time: 70.9 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "1000003"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# First prime greater than a million\n",
    "%time first(p for p in primes if p > million)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 23 ms, sys: 941 µs, total: 23.9 ms\n",
      "Wall time: 26.7 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "19186879"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# sum of the integers up to 10,000 that have exactly 3 distinct factors\n",
    "%time sum(n for n in range(1, 10000) if len(distinct(factors(n))) == 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 88.2 ms, sys: 2.42 ms, total: 90.6 ms\n",
      "Wall time: 97.2 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "65796"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# sum of the integers up to 10,000 that have exactly 3 divisors\n",
    "%time sum(n for n in range(1, 10000) if factors.ndivisors(n) == 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 326 ms, sys: 3.1 ms, total: 329 ms\n",
      "Wall time: 337 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "30393486"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# The sum of the totient function of the integers up to 1000 \n",
    "%time sum(map(factors.totient, range(1, 10000)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Project Euler Regression Testing\n",
    "\n",
    "My strategy for managing solutions to problems, and doing regression tests on them:\n",
    "* My solution to problem 1 is the function `problem_1()`, which returns the solution when called (and so on for other problems).\n",
    "* Once I have verified the answer to a problem (checking it on the Project Euler site), I store it in a dict called `solutions`.\n",
    "* Running `verify()` checks that all `problem_`*n* functions  return the correct solution. \n",
    "\n",
    "Project Euler asks participants not to publish solutions to problems, so I will comply, and instead show the solution to  three fake problems:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def problem_1(N=100):\n",
    "    \"Sum of integers: Find the sum of all the integers from 1 to 100 inclusive.\"\n",
    "    return sum(ints(1, N))\n",
    "\n",
    "def problem_2(): \n",
    "    \"Two plus two: how much is 2 + 2?\"\n",
    "    return int('2' + '2')\n",
    "\n",
    "def problem_42():\n",
    "    \"What is life?\"\n",
    "    return 6 * 7\n",
    "\n",
    "solutions = {1: 5050, 2: 4} "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Num       Time Status Answer           Problem Description   Expected\n",
      "=== ========== ====== ================ ===================== ========\n",
      "  1   0.00 sec     ok 5050             Sum of integers       \n",
      "  2   0.00 sec WRONG! 22               Two plus two          4\n",
      " 42   0.00 sec   NEW! 42               What is life?         \n"
     ]
    }
   ],
   "source": [
    "def verify(problem_numbers=range(1, 600)):\n",
    "    \"\"\"Main test harness function to verify problems. Pass in a collection of ints (problem numbers).\n",
    "    Prints a message giving execution time, and whether answer was expected.\"\"\"\n",
    "    print('Num       Time Status Answer           Problem Description   Expected')\n",
    "    print('=== ========== ====== ================ ===================== ========')\n",
    "    for p in problem_numbers:\n",
    "        name = 'problem_{}'.format(p)\n",
    "        if name in globals():\n",
    "            fn     = globals()[name]\n",
    "            t0     = time.time()\n",
    "            answer = fn()\n",
    "            t      = time.time() - t0\n",
    "            desc   = (fn.__doc__ or '??:').split(':')[0]\n",
    "            status = ('NEW!'   if p not in solutions else \n",
    "                      'WRONG!' if answer != solutions[p] else\n",
    "                      'SLOW!'  if t > 60 else\n",
    "                      'ok')\n",
    "            expected = (solutions[p] if status == 'WRONG!' else '')\n",
    "            print('{:3d} {:6.2f} sec {:>6} {:<16} {:<21} {}'\n",
    "                  .format(p, t, status, answer, desc, expected))\n",
    "\n",
    "verify()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}