#!/usr/bin/env python # coding: utf-8 # # Symbolic Math: Differentiation Program # # This notebook describes how to write a program that will manipulate mathematical expressions, with an emphasis on differentiation. The program can compute $\frac{d}{dx} x^2 + \sin(x)$ to get the result $2x + \cos(x)$. Usually in a programming language, when we say `sin(x)` we are computing with *numbers*, not *symbolic expressions*. That is, programming languages have built-in functions like `sin` and numbers like 0 so that we can ask for the value of `sin(0)` and get back 0. But now we want to do something different: treat $\sin(x)$ as a *symbolic expression* that represents any value of $x$. That facility is not built-in to most programming languages, so we need a way to represent and manipulate these symbolic expressions. Python has a large system called `sympy` to do just that, but we will develop a smaller, simpler system from scratch. # # # Representing Symbolic Expressions as Tuples # # Here is one simple way to represent symbolic expressions: We will represent numbers as themselves, because they are already built-in to Python. We will represent math variable (such as $x$) and constants (such as $\pi$) as Python strings (such as `'x'` and `'pi'`). And we will represent compound expressions (such as $x + 1$) as Python tuples, such as `('x', '+', 1)`. # # We will use the notion of [arity](http://en.wikipedia.org/wiki/Arity) of an expression. An expression can be: # # * Binary (arity = 2): $(x + y)$ is a binary expression because it has two sub-expressions, $x$ and $y$. # * Unary (arity = 1): $-x$ and $\sin(x)$ are both unary expressions, because they each have one sub-expression, $x$. # * Nullary (arity = 0): $x$ and $3$ are both nullary, or *atomic* expressions, because they have no sub-expressions, # # We can define this as a Python function: # In[31]: def arity(expr): "The number of sub-expressions in this expression." if isinstance(expr, tuple): return len(expr) - 1 else: return 0 # For example: # In[2]: exp = ('x', '+', 1) arity(exp) # This table summarizes the kinds of expressions and how we will represent them: # # #
Math
Expression
Math TypeArityPython
Representation
Python
Type #
$3$Integer03int #
$3.14$Number03.14float #
$x$Variable0'x'str #
$\pi$Constant0'pi'str #
$-x$Unary op1('-', 'x')tuple #
$\sin(x)$Unary func1('sin', 'x')tuple #
$x+1$Binary op2('x', '+', 1)tuple #
$3x - y/2$Nested
Binary op
2((3, '*', 'x'), '-',
 ('y', '/', 2))
tuple # #
# # Differentiation # In Calculus, *differentiation* is the process of computing the *derivative* of a function: the rate of change of the function with respect to a variable. We denote the dirivative of $y$ with respect to the variable $x$ as $\frac{dy}{dx}$. # # We can compute the derrivative by consulting a [table of differentiation rules](http://myhandbook.info/form_diff.html). Here are the rules for variables, constants, unary negation, and the four basic binary arithmetic operations: # $\frac{d}{dx} (x) = 1$ # # $\frac{d}{dx} (c) = 0$ # # $\frac{d}{dx} (-u) = - \frac{du}{dx}$ # # $\frac{d}{dx} (u + v) = \frac{du}{dx} + \frac{dv}{dx}$ # # $\frac{d}{dx} (u - v) = \frac{du}{dx} - \frac{dv}{dx}$ # # $\frac{d}{dx} (u \times v) = v \frac{du}{dx} + u \frac{dv}{dx}$ # # $\frac{d}{dx} (u \; / \; v) = (v \frac{du}{dx} - u \frac{dv}{dx}) \; / \; v^2$ # # Defining `D(y, x)` # We will define the Python function `D(y, x)` to compute the symbolic derivative of the expression `y` with respect to the variable `x`. We can handle each of the seven rules, one by one. If the input is of a form that does not match one of the rules, we will raise a `ValueError`. # In[3]: def D(y, x='x'): """Return the symbolic derivative, dy/dx. Handles binary operators +, -, *, /, and unary -, over variables and constants.""" if y == x: return 1 # d/dx (x) = 1 if arity(y) == 0: return 0 # d/dx (c) = 0 if arity(y) == 1: (op, u) = y # y is a compund expression of the form (op, u) if op == '-': return ('-', D(u, x)) # d/dx (-u) = - du/dx if arity(y) == 2: (u, op, v) = y # y is a compound expression of the form (u, op, v) if op == '+': return D(u, x), '+', D(v, x) if op == '-': return D(u, x), '-', D(v, x) if op == '*': return (v, '*', D(u, x)), '+', (u, '*', D(v, x)) if op == '/': return ((v, '*', D(u, x)), '-', (u, '*', D(v, x))), '/', (v, '*', v) raise ValueError("D can't handle this: " + str(y)) # Let's see what `D` can do: # In[4]: D(('x', '+', 1), 'x') # In[5]: D(((3, '*', 'x'), '+', 'c'), 'x') # In[6]: D(('y', '*', 'y'), 'y') # These answers are all mathematically *correct*, but they are not *simplified*—better answers would be `1`, `3`, and `(2, '*', 'y')`, respectively. So we have solved the basic problem, but there are enhancements we could make in several directions: # # * *Simplify*: Return `1` instead of `(1, '+', 0)`. # * *Diversify*: Handle more types of expressions, like $x^2$, and $\sin(x)$. # * *Refactor*: Maker the code cleaner by introducing a better representation for expressions. # # I choose to refactor first (because that will make the other two tasks easier). # # # Representing Symbolic Expressions as Class Objects # I will refactor the representation of expressions, replacing tuples with a user-defined class, `Expression`. Why do that? Three reasons: # # * **Hygiene**: The code is cleaner with a separate type. A tuple can be used for a lot of things; when I have an expression I want to know for sure it is an expression. # * **Ease of Coding**: I will be able to form a new expression with the simple code `x + 1` rather than the more verbose `(x, '+', 1)`. # * **Prettier Output**: We can see the prettier `(x + 1)` form on output, rather than the uglier `('x', '+', 1)`. # # Here's what I have in mind: # # # #
Math
Expression
Math
Type
ArityPython
Representation
Python
Type #
$3$Integer03int #
$3.14$Number03.14float #
$x$Variable0Expression('x')Expression #
$\pi$Constant0Expression('pi')Expression #
$-x$Unary op1Expression('-', 'x')Expression #
$\sin(x)$Unary func1Expression('sin', 'x')Expression #
$x+1$Binary op2Expression('+', x, 1) (given def for `x`)Expression #
$3x - y/2$Nested
Binary op
23 * x - y / 2   (given defs for `x`, `y`)Expression #
# # Note that not every math expression is an instance of the new class `Expression`; we still use good old Python `int` and `float` for numbers. But variables, constants (like $\pi$ or $c$), and unary and binary expressions are all instances of `Expression`. Note also that I considered having separate subclasses of `Expression` for each of these, but I decided the complication was not worth it. # # To understand the implementation of `Expression`, you need to understand Python's [special method names](https://docs.python.org/3/reference/datamodel.html#special-method-names), the method names that begin and end with double underscores, like `__init__` and `__add__`. If you're not familiar with the concept, glance at [the documentation](https://docs.python.org/3/reference/datamodel.html#special-method-names), but here is a quick summary: # # * `__init__(self, ...)` *The constructor that defines what happens when a new object is created.* # * `__add__(self, other)` *The method called when the code asks for `self + other`. Similar for `__sub__`, etc.* # * `__radd__(self, other)` *'r' for reverse. Called when the code asks for, say, `1 + self`, where `1` is not an `Expression`. Similar for `__rsub__`, etc.* # * `__neg__(self)` *The method called when the code says `(- self)`. Similar for `__pos__`.* # * `__hash__(self)` *Computes a hash value, an integer, used as a key into a hash table (`dict`).* # * `__eq__(self, other)` *Called to check whether `self == other`.* # In[33]: from __future__ import division class Expression(object): "A mathematical expression (other than a number)." def __init__(self, op, *args): self.op, self.args = op, args def __add__(self, other): return Expression('+', self, other) def __sub__(self, other): return Expression('-', self, other) def __mul__(self, other): return Expression('*', self, other) def __truediv__(self, other): return Expression('/', self, other) def __pow__(self, other): return Expression('**', self, other) def __radd__(self, other): return Expression('+', other, self) def __rsub__(self, other): return Expression('-', other, self) def __rmul__(self, other): return Expression('*', other, self) def __rtruediv__(self, other): return Expression('/', other, self) def __rpow__(self, other): return Expression('**', other, self) def __neg__(self): return Expression('-', self) def __pos__(self): return self def __hash__(self): return hash(self.op) ^ hash(self.args) def __ne__(self, other): return not (self == other) def __eq__(self, other): return (isinstance(other, Expression) and self.op == other.op and self.args == other.args) def __repr__(self): "A string representation of the Expression." op, args = self.op, self.args n = arity(self) if n == 0: return op if n == 2: return '({} {} {})'.format(args[0], op, args[1]) if n == 1: arg = str(args[0]) if arg.startswith('(') or op in {'+', '-'}: return '{}{}'.format(op, arg) else: return '{}({})'.format(op, arg) def arity(expr): "The number of sub-expressions in this expression." if isinstance(expr, Expression): return len(expr.args) else: return 0 # Let's first define some symbols: # In[34]: a, b, c, d, u, v, w, x, y, z, pi , e = map(Expression, [ 'a', 'b', 'c', 'd', 'u', 'v', 'w', 'x', 'y', 'z', 'pi', 'e']) # So, for example, `a` is now an object of type `Expression` such that `a.op` is the string `'a'` and `a.args` is the empty tuple, and `arity(a)` is 0. The Python expression `a + 1` is equivalent to `a.__add__(1)`, which invokes the `Expression.__add__` method to create a new object of type `Expression`: # In[35]: a + 1 # The Python expression `1 + a` is equivalent to `1.__add__(a)`. But since the integer `1` doesn't know how to add a `Symbol`, Python instead calls `a.__radd__(1)`, where `radd` stands for "reversed add": # In[36]: 1 + a # Let's try something more interesting: # In[37]: (-b + (b**2 - 4*a*c)) / (2 * a) # Rewriting `D(y, x)` with the Expression Class # === # # We can now rewrite `D`; notice that the code is a bit neater, since we don't have to create tuples, we can just use operators like `+`. # In[51]: def D(y, x=x): """Return the symbolic derivative, dy/dx. Handles binary operators +, -, *, /, and unary -, over variables and constants.""" if y == x: return 1 # d/dx (x) = 1 if arity(y) == 0: return 0 # d/dx (c) = 0 op = y.op if arity(y) == 1: u = y.args[0] if op == '-': return -D(u, x) # d/dx (-u) = - du/dx if arity(y) == 2: (u, v) = y.args if op == '+': return D(u, x) + D(v, x) if op == '-': return D(u, x) - D(v, x) if op == '*': return D(u, x) * v + D(v, x) * u if op == '/': return (v * D(u, x) - u * D(v, x)) / v ** 2 raise ValueError("D can't handle this: " + str(y)) # In[40]: D(x + 2, x) # In[41]: D(3 * x + c, x) # In[42]: D(y * y, y) # In[43]: D(- c * x, x) # Representing Functions # === # # Let's represent expressions like $\sin(x)$. We'll need to be clear of the difference between the function `sin` and the functional expression `sin(x)`. I choose to do that by making `sin` be an object of type `Function`, which is a subtype of `Expression`, and making `sin(x)` be an `Expression` whose `op` is `sin`: # # #
ExpressionTypeArityopargs #
`sin``Function`0`'sin'``()` #
`sin(x)``Expression`1`sin``(x,)` #
# # # What operations does `Function` need to support? We need to be able to create a new one, with an op and no args; it can inherit the `__init__` method from `Expression` to do this. Once we create a new one, it needs to be able to create a functional expression, like `sin(x)`. We can do that with the `__call__` method of `Function`: # In[44]: class Function(Expression): "A mathematical function of one argument, like sin or ln." def __call__(self, x): return Expression(self, x) # In[45]: sin, cos, tan, cot, sec, csc, ln, sqrt = map(Function, [ 'sin', 'cos', 'tan', 'cot', 'sec', 'csc', 'ln', 'sqrt']) # In[46]: sin # In[47]: vars(sin) # In[48]: sin(x) # In[49]: vars(sin(x)) # In[52]: (-b + sqrt(b**2 - 4*a*c)) / (2 * a) # In[26]: sin(x)**2 + cos(x)**2 # # # Extending `D(y, x)` to handle Exponentiation and Functions # ==== # # In this section, we extend `D(y, x)` to handle: # # * The binary exponentiation operator, $u^v$, denoted with `u ** v`. # * The six trigonometric functions sin, cos, tan, cot, csc, sec, and the natural logarithm functions, ln. # * The chain rule for nested functions. # # Here are the rules: # $\frac{d}{dx} (x^n) = n x^{(n-1)}$ *if n is an integer* # # $\frac{d}{dx} (u^v) = v u^{(v-1)} * \frac{du}{dx} + u^v * \ln(u) * \frac{dv}{dx}$ # # $\frac{d}{dx} \sin(x) = \cos(x)$ # # $\frac{d}{dx} \cos(x) = -\sin(x)$ # # $\frac{d}{dx} \tan(x) = \sec(x)^2$ # # $\frac{d}{dx} \cot(x) = -\csc(x)^2$ # # $\frac{d}{dx} \sec(x) = \sec(x) \tan(x)$ # # $\frac{d}{dx} \csc(x) = - \csc(x) \cot(x)$ # # $\frac{d}{dx} \ln(x) = 1 / x$ # # $\frac{dy}{dx} = \frac{dy}{du} \frac{du}{dx}$ # # We can add these rules to the ones we had before, giving us: # In[54]: def D(y, x=x): """Return the symbolic derivative, dy/dx. Handles binary operators +, -, *, /, and unary -, as well as transcendental trig and log functions over variables and constants.""" if y == x: return 1 # d/dx (x) = 1 if arity(y) == 0: return 0 # d/dx (c) = 0 if arity(y) == 1: op, (u,) = y.op, y.args if op == '+': return D(u, x) if op == '-': return -D(u, x) if u != x: return D(y, u) * D(u, x) ## CHAIN RULE if op == sin: return cos(x) if op == cos: return -sin(x) if op == tan: return sec(x) ** 2 if op == cot: return -csc(x) ** 2 if op == sec: return sec(x) * tan(x) if op == csc: return -csc(x) * cot(x) if op == ln: return 1 / x if arity(y) == 2: op, (u, v) = y.op, y.args if op == '+': return D(u, x) + D(v, x) if op == '-': return D(u, x) - D(v, x) if op == '*': return D(u, x) * v + D(v, x) * u if op == '/': return (v * D(u, x) - u * D(v, x)) / v ** 2 if op == '**': return (v * u**(v-1) if u == x and isinstance(v, int) else v * u**(v-1) * D(u, x) + u**v * ln(u) * D(v, x)) raise ValueError("D can't handle this: " + str(y)) # Chain Rule # --- # # The trickiest part is the chain rule. Consider $\frac{d}{dx} \sin(\ln(x))$. Let: # # $y = \sin(\ln(x))$ # # $u = \ln(x)$ # # According to the chain rule, # # $\frac{dy}{dx} = \frac{dy}{du} \frac{du}{dx}$. # # So we have: # # $\frac{dy}{du} = \frac{d}{du} \sin(u) = \cos(u) = \cos(\ln(x))$ # # $\frac{du}{dx} = \frac{d}{dx} \ln(x) = \frac{1}{x}$ # # Finally, we can multiply them together to get: # # $\frac{dy}{dx} = \frac{dy}{du} \;\frac{du}{dx} = \cos(\ln(x)) \; \frac{1}{x} = \cos(\ln(x)) / x$. # # # Now let's do the same thing in Python. Let: # In[58]: Y = sin(ln(x)) U = ln(x) # So let's go on with the computation of the derivative, `D(y, x) = D(y, u) * D(u, x)`. We'll look at the two terms on the right: # In[59]: D(Y, U) # In[60]: D(U, x) # And multiply the two terms together: # In[61]: D(Y, U) * D(U, x) # We can do the same computation all in one step: # In[62]: D(Y, x) # More Examples of Differentiation # === # # First an example where all is well: # In[63]: D(x ** 3, x) # But most of the time, `D` gives an answer that is technically correct, but not in simplified form. The example below should yield `2*a*x + b`: # In[64]: D(a*x**2 + b*x + c, x) # The next example should give `50 * (5*x - 2)**9`: # In[65]: D((5*x - 2)**10, x) # And for the next example, we should get $\frac{d}{dx} \sin(\ln(x^2)) = 2 \cos(\ln(x^2)) / x$. # In[66]: D(sin(ln(x**2)), x) # In each case, the answer is correct, but not simplified. # Simplification: `simp(y)` # ------ # # # Now let's work on simplification. We'll define `simp(y)` to return an expression that is equivalent to the expression $y$, but simpler. We should say that the notion of *simpler* is not at all precise. Which is simpler, $x^2 -x -12$ or $(x + 3)(x - 4)$? It depends on what you want to do next. But we can all agree that $x$ is simpler than $(2 - 1)x + 0$. Our version of `simp` will just do the easiest of simplifications. There's lots we could add to make it better. # # Another difficult issue is dealing with indeterminate results like dividing by 0. We would like to be able to simplify $x/x$ to 1. But that is only true if $x$ is not 0. We'll ignore that problem and go ahead and simplify $x/x$ to 1, but we'll feel a little uneasy about it. We'll also simplify 0/0 to `undefined` and 1/0 to `infinity`. # In[67]: def simp(y): "Simplify an expression." if arity(y) == 0: return y y = Expression(y.op, *map(simp, y.args)) ## Simplify the sub-expressions first op = y.op if arity(y) == 1: (u,) = y.args if y in simp_table: return simp_table[y] if op == '+': return u # + u = u if op == '-': if arity(u) == 1 and u.op == '-': return u.args[0] # - - w = w if arity(y) == 2: (u, v) = y.args if evaluable(u, op, v): # for example, (3 + 4) simplifies to 7 return eval(str(u) + op + str(v), {}) if op == '+': if u == 0: return v # 0 + v = v if v == 0: return u # u + 0 = u if u == v: return 2 * u # u + u = 2 * u if op == '-': if u == v: return 0 # u - v = 0 if v == 0: return u # u - 0 = u if u == 0: return -v # 0 - v = -v if op == '*': if u == 0 or v == 0: return 0 # 0 * v = u * 0 = 0 if u == 1: return v # 1 * v = v if v == 1: return u # u * 1 = u if u == v: return u ** 2 # u * u = u^2 if op == '/': if u == 0 and v == 0: return undefined # 0 / 0 = undefined if u == v: return 1 # u / u = 1 if v == 1: return u # u / 1 = u if u == 0: return 0 # 0 / v = 0 if v == 0: return infinity # u / 0 = infinity if op == '**': if v == 1: return u # u ** 1 = u if u == v == 0: return undefined # 0 ** 0 = undefined if v == 0: return 1 # u ** 0 = 1 # If no rules apply, return y unchanged. return y from numbers import Number # Deal with infinity and with undefined numbers (like infinity minus infinity) infinity = float('inf') undefined = nan = (infinity - infinity) # Table of known exact values for certain functions. # Use this, for example, to simplify sin(pi) to 0 or ln(e) to 1. simp_table = { sin(0): 0, sin(pi): 0, sin(pi/2): 1, sin(2*pi): 0, cos(0): 1, cos(pi): -1, cos(pi/2): 1, cos(2*pi): 1, tan(0): 0, tan(pi): 0, tan(pi/2): infinity, tan(2*pi): 0, tan(pi/4): 1, ln(1): 0, ln(e): 1, ln(0): -infinity} def evaluable(u, op, v): "Can we evaluate (u op v) to a number? True if u and v are numbers, and not a special case like 0^0." return (isinstance(u, Number) and isinstance(v, Number) and not (op == '/' and (v == 0 or u % v != 0)) and not (op == '^' and (u == v == 0))) # Let's try it out. First an easy one; $1x + 0$ should be $x$: Simplifying # In[68]: simp(x*(y/y) + 0) # Similarly, $(2 - 1)x^{\cos(y-y)} + \tan(\pi)$ should be just $x$: # In[69]: simp((2 - 1) * x ** cos(y-y) + tan(pi)) # Here are two examples where `simp` helps, but does not do all the simplifications it could: # In[70]: simp(D(sin(ln(x**2)), x)) # In[71]: simp(D((5 * x - 2)**10, x)) # In[72]: simp(cos(pi - pi)) # In[73]: simp(D(3 * x + c, x))