#!/usr/bin/env python # coding: utf-8 #

Peter Norvig 2014

# # # Cryptarithmetic (Alphametic) Problems # # On April 28, Gary Antonik had another [Numberplay column](http://wordplay.blogs.nytimes.com/2014/04/28/num/) that quotes my friend Bill Gosper. (Gosper often presents more advanced puzzles in the [math-fun](http://mailman.xmission.com/cgi-bin/mailman/listinfo/math-fun) mailing list.) This puzzle was: # # > For the expression `N U M + B E R = P L A Y`, # > - Which distinct numerals (each different) can be substituted for letters to make a valid expression? # > - How many solutions are there? # # I [tackled](https://www.udacity.com/wiki/cs212/unit-2#rethinking-eval) this type of problem (known as a [cryptarithmetic](http://mathworld.wolfram.com/Cryptarithmetic.html) or [alphametic](http://mathworld.wolfram.com/Alphametic.html) problem) in my Udacity class [CS 212](https://www.udacity.com/wiki/cs212/unit-2#cryptarithmetic). # # My initial approach was simple: [when in doubt, use brute force](https://www.brainyquote.com/quotes/ken_thompson_185574). I try all permutations of digits replacing letters (that should be quick and easy—there can be at most 10 factorial or 3.6 million permutations), then for each one, I use Python's `eval` function to see if the resulting string is a valid expression. The basic idea is simple, but there are a few complications to worry about: # # 1. Math uses `=` and Python uses `==` for equality; we'll allow either one in formulas. # 2. A number with a leading zero (like `012`) is illegal (but `0` by itself is ok). # 4. If we try to eval an expression like 1/0, we'll have to handle the divide-by-zero error. # 5. Only uppercase letters are replaced by digits. So in `2 * BE or not 2`, the `or` and `not` are Python keywords. # 6. Should I find one solution (faster) or all solutions (more complete)? I'll handle both use cases by # implementing my function `solve` to return an iterator, which yields solutions one at a time; you can get the first one with `next` or all of them with `set`. # # # The solution: `solve` # # Below we see that `solve` works by generating every way to replace letters in the formula with numbers, # and then filtering them to keep only valid strings (ones that evaluate to true and have no leading zero). # The `str.translate` method is used to do the replacements. # In[1]: import itertools import re def solve(formula): """Given a formula like 'NUM + BER = PLAY', fill in digits to solve it. Generate all valid digit-filled-in strings.""" return filter(valid, letter_replacements(formula)) def letter_replacements(formula): """All possible replacements of letters with digits in formula.""" formula = formula.replace(' = ', ' == ') # Allow = or == letters = cat(set(re.findall('[A-Z]', formula))) for digits in itertools.permutations('1234567890', len(letters)): yield formula.translate(str.maketrans(letters, cat(digits))) def valid(exp): """Expression is valid iff it has no leading zero, and evaluates to true.""" try: return not leading_zero(exp) and eval(exp) is True except ArithmeticError: return False cat = ''.join # Function to concatenate strings leading_zero = re.compile(r'\b0[0-9]').search # Function to check for illegal number # In[2]: next(solve('NUM + BER = PLAY')) # In[3]: get_ipython().run_line_magic('time', "len(set(solve('NUM + BER = PLAY')))") # # A faster solution: `faster_solve` # # Depending on your computer, that probably took 15 or 20 seconds. Can we make it faster? To answer the question, I start by profiling to see where the time is spent. I can use the magic function `%prun` to profile: # In[4]: get_ipython().run_line_magic('prun', "next(solve('NUM + BER = PLAY'))") # We see that about 2/3 of the time is spent in `eval`. So let's eliminate the calls to `eval`. That should be doable, because the expression we are evaluating is basically the same each time, but with different permutations of digits filled in. We could save a lot of work if we convert the expression into a Python function, compile that function just once, and then call the function for each of the 3.6 million permutations of digits. We want to take an expression such as: # # "NUM + BER == PLAY" # # and transform it into the Python function: # # (lambda A,B,E,L,M,N,P,R,U,Y: # (100*N+10*U+M) + (100*B+10*E+R) == (1000*P+100*L+10*A+Y)) # # Actually that's not quite right. The rules say that "N", "B", and "P" cannot be zero. So the function should be: # # (lambda A,B,E,L,M,N,P,R,U,Y: # B and N and P and ((100*N+10*U+M) + (100*B+10*E+R) == (1000*P+100*L+10*A+Y))) # # Here is the code to compile a formula into a Python function: # In[5]: def compile_formula(formula, verbose=False): """Compile formula into a function. Also return letters found, as a str, in same order as parms of function. For example, 'YOU == ME**2' returns (lambda E,M,O,U,Y: M and Y and ((100*Y+10*O+U) == (10*M+E)**2), 'YMEUO'""" formula = formula.replace(' = ', ' == ') letters = cat(sorted(set(re.findall('[A-Z]', formula)))) firstletters = sorted(set(re.findall(r'\b([A-Z])[A-Z]', formula))) body = re.sub('[A-Z]+', compile_word, formula) body = ' and '.join(firstletters + [body]) fn = 'lambda {}: {}'.format(','.join(letters), body) if verbose: print(fn) assert len(letters) <= 10 return eval(fn), letters def compile_word(matchobj): "Compile the word 'YOU' as '(100*Y + 10*O + U)'." word = matchobj.group() terms = reversed([mult(10**i, L) for (i, L) in enumerate(reversed(word))]) return '(' + '+'.join(terms) + ')' def mult(factor, var): return var if factor == 1 else str(factor) + '*' + var # In[6]: compile_formula("YOU == ME**2", verbose=True) # In[7]: compile_formula("NUM + BER = PLAY", verbose=True) # Now we're ready for the faster version of `solve`: # In[8]: def faster_solve(formula): """Given a formula like 'NUM + BER == PLAY', fill in digits to solve it. This version precompiles the formula and generates all digit-filled-in strings.""" fn, letters = compile_formula(formula) for digits in itertools.permutations((1,2,3,4,5,6,7,8,9,0), len(letters)): try: if fn(*digits): yield formula.translate(str.maketrans(letters, cat(map(str, digits)))) except ArithmeticError: pass # In[9]: next(faster_solve('NUM + BER = PLAY')) # In[10]: get_ipython().run_line_magic('time', "len(list(faster_solve('NUM + BER = PLAY')))") # We get the same answer, but `faster_solve` is 15 times faster than `solve`. # # More Examples # In[11]: examples = """ NUM + BER = PLAY YOU = ME ** 2 SEND + MORE = MONEY PI * R**2 = AREA WRONG + WRONG = RIGHT WRIGHT + WRIGHT = TO * FLY + FLIGHT TWO + TWENTY = TWELVE + TEN A**2 + B**2 = C**2 AYH**2 + BEE**2 = SEE**2 RAMN = R**3 + RM**3 = N**3 + RX**3 MON-EY = EVIL**(1/2) ALPHABET + LETTERS = SCRABBLE SIN**2 + COS**2 = UNITY POTATO + TOMATO = PUMPKIN ODD * ODD = FREAKY DOUBLE + DOUBLE + TOIL = TROUBLE WASH + YOUR = HANDS SPEED + LIMIT = FIFTY TERRIBLE + NUMBER = THIRTEEN SEVEN + SEVEN + SIX = TWENTY EIGHT + EIGHT + TWO + ONE + ONE = TWENTY ELEVEN + NINE + FIVE + FIVE = THIRTY NINE + SEVEN + SEVEN + SEVEN = THIRTY FOURTEEN + TEN + TEN + SEVEN = FORTYONE HAWAII + IDAHO + IOWA + OHIO = STATES VIOLIN * 2 + VIOLA = TRIO + SONATA SEND + A + TAD + MORE = MONEY ZEROES + ONES = BINARY DCLIZ + DLXVI = MCCXXV COUPLE + COUPLE = QUARTET FISH + N + CHIPS = SUPPER SATURN + URANUS + NEPTUNE + PLUTO = PLANETS PLUTO not in {PLANETS} EARTH + AIR + FIRE + WATER = NATURE TWO * TWO = SQUARE HIP * HIP = HURRAY NORTH / SOUTH = EAST / WEST NAUGHT ** 2 = ZERO ** 3 I + THINK + IT + BE + THINE = INDEED DO + YOU + FEEL = LUCKY WELL - DO + YOU = PUNK NOW + WE + KNOW + THE = TRUTH SORRY + TO + BE + A + PARTY = POOPER SORRY + TO + BUST + YOUR = BUBBLE STEEL + BELTED = RADIALS ABRA + CADABRA + ABRA + CADABRA = HOUDINI I + GUESS + THE + TRUTH = HURTS LETS + CUT + TO + THE = CHASE THATS + THE + THEORY = ANYWAY TWO + TWO = FOUR X / X = X A**N + B**N = C**N and N > 1 ATOM**0.5 = A + TO + M GLITTERS is not GOLD ONE < TWO and FOUR < FIVE ONE < TWO < THREE < TWO * TWO (2 * BE or not 2 * BE) = THE + QUES-TION sum(range(AA)) = BB sum(range(POP)) = BOBO ODD + ODD = EVEN ROMANS+ALSO+MORE+OR+LESS+ADDED = LETTERS FOUR+ONE==FIVE and ONE+ONE+ONE+ONE+ONE==FIVE TEN+SEVEN+SEVEN+SEVEN+FOUR+FOUR+ONE = FORTY NINETEEN+THIRTEEN+THREE+2*TWO+3*ONE = FORTYTWO IN + ARCTIC + TERRAIN + AN + ANCIENT + EERIE + ICE + TRACT + I + ENTER + A + TRANCE = FLATIANA ONE < TWO < THREE < SEVEN - THREE < THREE + TWO < THREE + THREE < SEVEN < SEVEN + ONE < THREE * THREE AN + ACCELERATING + INFERENTIAL + ENGINEERING + TALE + ELITE + GRANT + FEE + ET + CETERA = ARTIFICIAL + INTELLIGENCE """.strip().splitlines() def run(examples): for e in examples: print('{:45}| {}'.format(e, next(faster_solve(e)))) return len(examples) get_ipython().run_line_magic('time', 'run(examples)')