#!/usr/bin/env python
# coding: utf-8
#
Peter Norvig
March 2019
#
# # Pairing Socks
#
# [Bram Cohen](https://en.wikipedia.org/wiki/Bram_Cohen) posed a problem that I'll restate thusly:
#
# > *You have N pairs of socks, all different, in the dryer.
# You pull random socks out one-by-one, placing each sock in one of C possible places on the countertop.
# If the latest sock pairs with another on the countertop, you put the pair away in a drawer. We can ask:*
# 1. *What's the probability that you will pair up all 2N socks before you run out of the C places?*
# 2. *For a given N, what is the minimum C to have a 50% chance?*
# 3. *A 100% chance?*
#
# The third question is easy to solve analytically: to be 100% certain of pairing up N pairs, we would need N + 1 countertop spaces, because it is possible that the first N socks are all different, but the next one will always pair with one of the others.
# How about the other questions?
# - Can I come up with a mathematical formula? **No.**
# - Could I calculate an exact answer? **Only for small N.** Enumerating all permutations of socks would take *O*((2N)!), but since I don't care *which* sock is next, only whether it is a match/non-match, I could get that down to *O*(2(2N)), but that's still bad.
# - Could I do a simulation? **Yes.** And since we're mostly interested in events with probability around 50%, a simulation with, say, 1000 trials is likely to give very good approximations. If we were investigating events with probability 0.0001, we'd need many more trials.
#
# # Sock Pairing Simulation
#
#
# In[1]:
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
import random
from statistics import mean
from functools import lru_cache
# In[2]:
def pair_socks(C:int, socks:list) -> bool:
"""Can we pair up this sock ordering using a countertop of size C?"""
countertop = []
while socks and len(countertop) < C:
s = socks.pop()
countertop.remove(s) if s in countertop else countertop.append(s)
return not socks
def random_socks(N):
"""A random ordering of N pairs of socks."""
socks = 2 * list(range(N))
random.shuffle(socks)
return socks
@lru_cache(None)
def P(N, C, trials=1000) -> float:
"""Estimated probability of pairing up all N pairs on a countertop of size C."""
return mean(pair_socks(C, random_socks(N)) for _ in range(trials))
def minimum_C(N, p) -> int:
"""Minimum countertop size needed to have probability `p` of pairing up all socks."""
return next(C for C in range(N // 2, 3*N) if P(N, C) > p)
# First, we can use `minimum_C` to find the countertop size necessary to give a 50% chance of pairing up all the socks, for various values of *N*. We see that the relation is very nearly linear, and the slope is a little more than 1/2:
# In[3]:
def fig(xlabel, ylabel):
"""Set up a figure."""
plt.figure(figsize=(16,8)); plt.xlabel(xlabel); plt.ylabel(ylabel)
plt.minorticks_on()
plt.grid(b=True, which='major')
plt.grid(b=True, which='minor', alpha=0.2)
def plotC(Ns=range(10, 201, 10)):
fig('Number of pairs of socks', 'Size of countertop needed for > 50% paired up')
X = list(Ns)
Y = [minimum_C(N, 0.5) for N in Ns]
plt.plot(X, Y, 'o-')
plt.plot([X[0], X[-1]], [Y[0], Y[-1]])
print('Slope =', round(extent(Y) / extent(X), 2))
def extent(X): return max(X) - min(X)
plotC()
# Now, for various numbers of pairs of socks ***N***, plot the probability `P(N, C)` of being able to pair up all the socks:
# In[4]:
def plotP(Ns=range(10, 101, 10)):
fig('Size of countertop, C', 'Probability of pairing up all socks, P(N, C)')
for N in Ns:
X = range(max(2, N // 2 - 8), min(N + 2, N // 2 + 18))
plt.plot(X, [P(N, C) for C in X], 'o-', label='N = {}'.format(N))
plt.legend()
plotP()
# # Exact Probabilities
#
# I said that I could calculate the probability exactly for small values of ***N***. Let's do that. `Pexact` will call `pair_socks` once for each posssible permutation of the socks, and will use `fractions.Fraction` to get an exact rational number as the probability. This gives us a precise answer, and serves as a check on the inexact function `P`.
# In[5]:
from fractions import Fraction
from itertools import permutations
@lru_cache(None)
def Pexact(N, C) -> float:
"""Exact probability of pairing up all N pairs on a countertop of size C."""
return mean(Fraction(pair_socks(C, list(socks)))
for socks in permutations(list(range(N)) * 2))
# In[6]:
N, C = 5, 4
{'1000 trials': P(N, C, trials=1000),
'10,0000 trials': P(N, C, trials=10000),
'exact': Pexact(N, C),
'exact as decimal': float(Pexact(N, C))}
# We see that the `P` estimate with 1000 trials is not too bad, and with 10,000 trials it is much better.