#Pattern Derivatives v 0.1
#Author and Copyright Casey S. Schroeder
#
#Released under the MIT License
#
#Compendium of initial efforts to code fomulas relevant to solving the problem of efficiently counting
#embedded sequences and as further relates to the matter of pricing Pattern Derivatives
#
#Mindful of the differences here. Compared with the paper a = L, n = k, i = t, and m = n. This is similar to some
#vocabulary on pattern matching in the computer science literature that i used when initially investigating the problem.
#(simultaneously, note that n has a different meaning)
import math
#********************************************************************************************************
#The following is the exact basic recursive probability formula used in the basic pattern derivative case,
#and it's summation, as used in the European Options case.
#
#Mindful of the differences here. Compared with the paper a = L, n = k, i = t, and m = n. This is similar to some
#vocabulary on pattern matching in the computer science literature that i used when initially investigating the problem.
#epsilon here just provides a set of indicies
#********************************************************************************************************
def epsilon_p(p, q):
'''finds the self-overlap index (1-based) set for the given pattern (string); needs modification with
decorators so that a hash of it is made and is not recomputed each time'''
n = len(p)
l2 = len(q)
if n != l2: raise ValueError("length of patterns in set do not match")
ind = []
for i in range(0,n-1):
flag = True
for j in range(0, i+1):
if q[j]!=p[n-i-1+j]:
flag = False
break
# print p[j],p[n-i-1+j],flag
if flag == True:
ind.append(i+1)
# print i
return ind
def epsilon(p, q, wc='*'):
'''finds the self-overlap index (1-based) set for the given pattern (string); needs modification with
decorators so that a hash of it is made and is not recomputed each time.
does not assume p and q have same length, but will not match at any index greater than length min(p, q)
i.e. will not provide an index if p is in q or q in p, unless q is at end of p or p at beginning of q
it is used where it is assumed that one pattern is not embedded in another, except if p is a prefix to an mseries
'''
n = len(p)
l2 = len(q)
ind = []
for i in range(0,n):
if i>=l2-1: break
flag = True
for j in range(0, i+1):
if q[j]!=p[n-i-1+j] and wc!=p[n-i-1+j] and q[j]!=wc:
flag = False
break
if flag == True:
ind.append(i+1)
return ind
def epsilon_m(p, q):
'''finds the self-overlap index (1-based) set for the given pattern (string); needs modification with
decorators so that a hash of it is made and is not recomputed each time.
does not assume p and q have same length, but will not match at any index greater than length min(p, q)
i.e. will not provide an index if p is in q or q in p, unless q is at end of p or p at beginning of q
it is used where it is assumed that one pattern is not embedded in another, except if p is a prefix to an mseries
'''
n = len(p)
l2 = len(q)
ind = []
for i in range(0,n):
if i>=l2-1: break
flag = True
for j in range(0, i+1):
if q[j]!=p[n-i-1+j]:
flag = False
break
if flag == True:
ind.append(i+1)
return ind
def pr_first_occ_at_index(n, i, a, epsilon):
'''basic recursive function indicated in the text, used subsequently in all valuation formulas
accurately shows that the exponent in the last term should be n-i+j and not n-i-j. Mindful of
the differences here. Compared with the paper a = L, n = k, i = t, and epsilon is the e function, but
here it is just a list'''
if i < n: return 0
nooversum = 0
if i >= 2*n:
for j in range(n, i-n+1):
nooversum -= pr_first_occ_at_index(n, j, a, epsilon)
oversum = 0
if n > 1:
for j in range(i-n+1, i-1+1):
if (n-i+j) in epsilon:
oversum -= pr_first_occ_at_index(n, j, a, epsilon)*pow(a,n-i+j)
'''slightly improved version of oversum calc
if n > 1:
for e in epsilon:
j = (e + 1) - n + i
oversum -= pr_first_occ_at_index(n, j, a, epsilon)*pow(a,n-i+j)'''
return (1+nooversum+oversum)*pow(a,-n)
def pr_sum_recursive(m, p, a):
'''uses the exact method indicated in the paper for the base case of one-pattern European Options
called recursive here, because it uses the basic recursive function indicated in the text'''
total = 0
n = len(p)
eps = epsilon(p, p)
for i in range(n,m+1):
total += pr_first_occ_at_index(n,i,a, eps)
return total,eps
#********************************************************************************************************
# fundamental recursive function indicated in the text for sets of patterns, indicating probability that any pattern
# from the set occurs at a given time. It is useful for constructing the disjunctive (V) multi-pattern derivative.
#
#Mindful of the differences here. Compared with the paper a = L, n = k, i = t, and m = n. This is similar to some
#vocabulary on pattern matching in the computer science literature that i used when initially investigating the problem.
#Epsilon is here a function returning indicies.
#
#********************************************************************************************************
def pr_first_occ_at_index_nset(p, i, nset, a):
'''basic recursive function indicated in the text for sets of patterns. It is useful for constructing the
disjunctive (V) multi-pattern derivative. Again, compared with the paper a = L, n = k, i = t, and epsilon
is the e function, but here it is just a list'''
n = len(p)
if i < n: return 0
nooversum = 0
if i >= 2*n:
for q in nset:
for j in range(n, i-n+1):
nooversum -= pr_first_occ_at_index_nset(q, j, nset, a)
oversum = 0
'''see single pattern version for possible optimization to the following'''
if n > 1:
for q in nset:
for j in range(i-n+1, i-1+1):
if (n-i+j) in epsilon(q,p):
oversum -= pr_first_occ_at_index_nset(q, j, nset, a)*pow(a,n-i+j)
return (1+nooversum+oversum)*pow(a,-n)
def pr_sum_recursive_nset(m, nset, a):
'''uses the exact method indicated in the paper for the base case of n-pattern European Options
called recursive here, because it uses the basic recursive function indicated in the text'''
total = 0
n = len(nset[0])
for p in nset:
for i in range(n,m+1):
total += pr_first_occ_at_index_nset(p,i,nset,a)
return total
#********************************************************************************************************
# *Market Tradable Case*
# recursive function indicated in the text for sets of patterns in the *market tradable case*,
# indicating probability that a pattern from the set occurs at a given time, including times prior to time m = n,
# given a prior set of events.
#Mindful of the differences here. Compared with the paper a = L, n = k, i = t, and m = n. This is similar to some
#vocabulary on pattern matching in the computer science literature that i used when initially investigating the problem.
#Epsilon is here a function returning indicies.
#
#********************************************************************************************************
def pr_first_occ_at_index_nset_leading_values(p, i, nset, a, w):
'''same as pr_first_occ_at_index_nset but uses w as the prefix, indicating an initial series of values which
have already occurred. '''
n = len(p)
leadingsum = 0
nooversum = 0
oversum = 0
if i <= 0 or n == 0:
return 0
elif i <= n:
if n-i in epsilon(w,p) or i==n:
leadingsum = pow(a,-i)
for q in nset:
for j in epsilon(q,p):
#if the following condition holds, then the existance of p at i *forces* the existance of a prior q
#in which case this cannot be the first occurrance from nset
if len(q)+n - (i + j) in epsilon(w, q): leadingsum = 0
#if j <= i-1:
#leadingsum -= pr_first_occ_at_index_nset_leading_values(q, i-(n-j), nset, a, w)*pow(a,-i+(n-j))
return leadingsum
#elif i < 2*n:
else:
if i > n:
for q in nset:
for j in range(1, i-n+1):
nooversum -= pr_first_occ_at_index_nset_leading_values(q, j, nset, a, w)
'''see single pattern version for possible optimization to the following'''
if n > 1:
for q in nset:
for j in range(i-n+1, i-1+1):
if (n-i+j) in epsilon(q,p):
oversum -= pr_first_occ_at_index_nset_leading_values(q, j, nset, a, w)*pow(a,n-i+j)
return (1+nooversum+oversum)*pow(a,-n)
return 0
val_dict = dict()
def pr_first_occ_at_index_nset_leading_values_wc(p, i, nset, a, w, wc='*'):
'''same as pr_first_occ_at_index_nset but uses w as the prefix, indicating an initial series of values which
have already occurred. '''
vals = (p, i, tuple(nset), a, w, wc)
if val_dict.has_key(vals):
return val_dict[vals]
n = len(p)
leadingsum = 0
nooversum = 0
oversum = 0
if i <= 0 or n == 0:
val_dict[vals] = 0
return 0
elif i <= n:
if n-i in epsilon(w,p) or i==n:
leadingsum = 1
for q in nset:
for j in epsilon(q,p):
#if the following condition holds, then the existance of p at i *forces* the existance of a prior q
#in which case this cannot be the first occurrance from nset
if len(q)+n - (i + j) in epsilon(w, q): return 0
#if j <= i-1:
#leadingsum -= pr_first_occ_at_index_nset_leading_values(q, i-(n-j), nset, a, w)*pow(a,-i+(n-j))
val = pow(a, -i+p[n-i:n].count(wc))*leadingsum
val_dict[vals] = val
return val
#elif i < 2*n:
else:
if i > n:
for q in nset:
for j in range(1, i-n+1):
nooversum -= pr_first_occ_at_index_nset_leading_values_wc(q, j, nset, a, w, wc)
'''see single pattern version for possible optimization to the following'''
if n > 1:
for q in nset:
for j in range(i-n+1, i-1+1):
if (n-i+j) in epsilon(q,p):
#needs testing for a != 2 and modification in the event of multiple wildcards... in all *much* work but appears to work for one wc in one string
oversum -= pr_first_occ_at_index_nset_leading_values_wc(q, j, nset, a, w, wc)*pow(a,n-i+j - q[len(q)-(n-i+j):len(q)].count(wc) - p[0:len(p)-(n-i+j)].count(wc))
val = (1+nooversum+oversum)*pow(a,-(n- p.count(wc)))
val_dict[vals] = val
return val
return 0
def pr_sum_recursive_nset_with_leading_values(m, nset, a, w):
'''m is length of series in which a member of nset is to be found. nset is the set of patterns, all of the same
size. a is the size of the alphabet. w is a prefix to m, which must be the same size as the patterns in nset.
for instance, if we are dealing with a derivative with 20 periods to expiry, with a contract period of 25+ days
and patterns of length 5 (and it has yet to be struck) then insert m = 20, the set of patterns = nset, the size
of the alphabet (a = 2 for {'u','d'}) and w equal to a string of the five previous values, from t = 2 to t=6, e.g.
"udduud". If you are at a t < 5, say t=3, you can use "xxxud", where 'x' must not occur in the alphabet.
'''
total = 0
for p in nset:
for i in range(1,m+1):
total += pr_first_occ_at_index_nset_leading_values_wc(p,i,nset,a, w)
return total
def pr_sum_recursive_nset_with_leading_values_american_mixed(m, q_i, q_e, a, w):
'''
used in valuation of mixed american options. Same as standard, but sum only over q_i among q_i U q_e
'''
total = 0
union = list(q_i)
union.extend(q_e)
for p in q_i:
for i in range(1,m+1):
total += pr_first_occ_at_index_nset_leading_values_wc(p,i,union,a, w)
return total
def pr_sum_recursive_nset_with_leading_values_euro_mixed(m, q_i, q_e, a, w):
'''
used in valuation of mixed european options. Same as standard, but sum only over q_i among q_i U q_e
'''
total = 0
union = list(q_i)
union.extend(q_e)
for p in q_i:
summa = 0
for i in range(1,m+1):
val = pr_first_occ_at_index_nset_leading_values_wc(p,i,union,a, w)
subval = 0
for q in q_e:
for j in range(1, m-i+1):
subval += pr_first_occ_at_index_nset_leading_values_wc(q,j,q_e,a, p)
summa += (val * (1 - subval))
total += summa
return total
#********************************************************************************************************
# *Valuation formulas*
# The following formulas are for use in the standard form of the non-mixed pattern option case;
# as it is non-mixed, these formulas only take one pattern clause, nset (Q, in the paper).
# mixed versions are easily provided by adding the payment component of these formulas to the logic
# in the mixed probability formuals provided in the previous section (i.e. functions ending with "_mixed")
#********************************************************************************************************
def full_inclusive_american(m, nset, a, w, payoff, rate, period=365):
import math
total = 0
totprob = 0
for p in nset:
for i in range(1,m+1):
prob = pr_first_occ_at_index_nset_leading_values_wc(p,i,nset,a, w)
total += prob*math.exp(-rate*i/period)
return total*payoff
def full_inclusive_european(m, nset, a, w, payoff, rate, period=365):
import math
total = 0
for p in nset:
for i in range(1,m+1):
total += pr_first_occ_at_index_nset_leading_values_wc(p,i,nset,a, w)
return total*payoff*math.exp(-rate*m/period)
def full_exclusive_european(m, nset, a, w, payoff, rate, period=365):
import math
total = 0
for p in nset:
for i in range(1,m+1):
total += pr_first_occ_at_index_nset_leading_values_wc(p,i,nset,a, w)
return (1-total)*payoff*math.exp(-rate*m/period)
net_dict = dict()
def iterative_inclusive_american_no_prefix_no_wait(m, nset, a, payoff, rate, period=365):
#in this version, if there is a match, then there is a payout equal to "payoff" plus a new derivative with the same
#specifications, which starts immediately following the match and runs to the end of the term, but does not include
#the matching pattern as a prefix.
import math
if m<1 : return 0
vals = (m, tuple(nset), a, payoff, rate, period)
if net_dict.has_key(vals): return net_dict[vals]
total = 0
totprob = 0
for p in nset:
for i in range(1,m+1):
adj_payoff = payoff + iterative_inclusive_american(m-i-len(p), nset, a, payoff, rate, period)
prob = pr_first_occ_at_index_nset_leading_values_wc(p,i,nset,a, '')
total += adj_payoff*prob*math.exp(-rate*i/period)
return total
net_dict2 = dict()
def iterative_inclusive_american_prefix_no_wait(m, nset, a, w, payoff, rate, period=365):
#in this version, if there is a match, then there is a payout equal to "payoff" plus a new derivative with the same
#specifications, which starts immediately following the match and runs to the end of the term, and includes the
#matching pattern as a prefix.
import math
if m<1 : return 0
vals = (m, tuple(nset), a, payoff, rate, period)
if net_dict2.has_key(vals): return net_dict2[vals]
total = 0
totprob = 0
for p in nset:
for i in range(1,m+1):
adj_payoff = payoff + iterative_inclusive_american(m-i-len(p), nset, a, p, payoff, rate, period)
prob = pr_first_occ_at_index_nset_leading_values_wc(p,i,nset,a, w)
total += adj_payoff*prob*math.exp(-rate*i/period)
return total
net_dict3 = dict()
def iterative_inclusive_american_no_prefix_wait(m, nset, a, payoff, rate, wait, period=365):
#in this version, if there is a match, then there is a payout equal to "payoff" plus a new derivative with the same
#specifications, which starts some period "wait" following the match and runs to the end of the term, and does not
#include the matching pattern as a prefix.
import math
if m<1 : return 0
vals = (m, tuple(nset), a, payoff, rate, period)
if net_dict3.has_key(vals): return net_dict3[vals]
total = 0
totprob = 0
for p in nset:
for i in range(1,m+1):
adj_payoff = payoff + iterative_inclusive_american(m-i-len(p)-wait, nset, a, payoff, rate, wait, period)
prob = pr_first_occ_at_index_nset_leading_values_wc(p,i,nset,a, w)
total += adj_payoff*prob*math.exp(-rate*i/period)
return total
m=24
alpha = 2
arr = list()
vals = list()
baseline=pr_sum_recursive(m,"01111111",alpha)
baseline=baseline[0]
vals.append(pr_sum_recursive(m,"11111111111111",alpha))
vals.append(pr_sum_recursive(m,"10000001000000",alpha))
vals.append(pr_sum_recursive(m,"00000011000000",alpha))
#vals.append(pr_sum_recursive(m,"101101",alpha))
#vals.append(pr_sum_recursive(m,"011101",alpha))
#vals.append(pr_sum_recursive(m,"001100",alpha))
#vals.append(pr_sum_recursive(m,"011110",alpha))
arr.append(((vals[0][0]-baseline)*(pow(2,24))))
arr.append(((vals[1][0]-baseline)*(pow(2,24))))
arr.append(((vals[2][0]-baseline)*(pow(2,24))))
#arr.append(((vals[3][0]-baseline))/sum(vals[3][1]))
#arr.append(((vals[4][0]-baseline))/sum(vals[4][1]))
#arr.append(((vals[5][0]-baseline))/sum(vals[5][1]))
#the case of a wait and a prefix together is slightly more difficult. You must make the loop for time variable.
'''
#validation script. Runs brute function, assumed accurate but slow, against the recursive function for
#probability. Probability embedded in the brute force function return value text, easy to see, but modify return
#values for automated checking.
from BrutePatterns import *
#print pr_sum_recursive_nset_with_leading_values(30, ["aaaaa"], 2, "xxxxx")
#print brute_nset_m_series_with_prefix_count_greaterthan_or_exactly_c_copies(10, ['a','b'], ["aba"], 1, "bba")
tot = [["x", "d"],["xx", "du"],["xxx", "duu"],["xxxx", "duuu"],["xxxxx", "duuuu", ],["xxxxxx", "duuuuu"]]
#f = file("C:\Users\Do It\Desktop\print_out.txt",'w')
#for s in tot:
# for x in s[1:]:
# for i in range(1,16):
# f.writelines(x+": "+ str(i) + ": ")
#f.writelines(str(pr_sum_recursive_nset_with_leading_values(i, [x], 2, s[0]))+": ")
# f.writelines(str(brute_nset_m_series_with_prefix_count_greaterthan_or_exactly_c_copies_onecounts(i, ['u','d'], [x], 1, s[0]))+'\n')
#f.close()
#... and so on. This validates the most general case.
def patterns(n, k, a):
if k > n or n < 1 or k < 1: return 0.0
if k == 1: return 1.0-(pow(a-1.0, n)/pow(a,n))
if k == n: return pow((1.0/a),n)
l = patterns(n-1, k, a) # chance that the k pattern occurs in the prior n-1
v = patterns(n-1, k-1, a) # chance that the k-1 preceding pattern occurs in the prior n-1
c = patterns(n-2, k-1, a) # chance that the k-1 pattern occurs in the prior n-2
print l, v, c
#chance in prior n-1, + chance that did not occur in prior n-1 * chance the preceding k-1 first occurs at n-1
# * the probability of getting the correct letter
return l + (1-l)*(v*(1 - c))*(1.0/a)
'''