How to propagate `\n` to sympy.latex() - matplotlib

The Goal is to format a polynomial with more than 6 parameters into a plot title.
Here is my polynomial parameter to string expression function, inspired by this answer, followed by sym.latex():
def func(p_list):
str_expr = ""
for i in range(len(p_list)-1,-1,-1):
if (i%2 == 0 and i !=len(p_list)):
str_expr =str_expr + " \n "
if p_list[i]>0:
sign = " +"
else:
sign = ""
if i > 1:
str_expr = str_expr+" + %s*x**%s"%(p_list[i],i)
if i == 1:
str_expr = str_expr+" + %s*x"%(p_list[i])
if i == 0:
str_expr = str_expr+sign+" %s"%(p_list[i])
print("str_expr",str_expr)
return sym.sympify(str_expr)
popt = [-2,1,1] # some toy data
tex = sym.latex(func(popt))
print("tex",tex)
Outputs:
str_expr
+ -1*x**2 + 1*x
-2
tex - x^{2} + x - 2
in str_expr the line breaks from \n are visible, yet in the sympy.latex output the are gone.
How to propagate this linebreak?
Edit: I took # wsdookadr answer and modified it, so that plt.title takes the result of the function as text the argument
def tex_multiline_poly(e, chunk_size=2, separator="\n"):
tex = ""
# split into monomials
print("reversed(e.args)",reversed(e.args))
mono = list(e.args)
print("mono",mono)
mono.reverse()
print("mono",mono)
# we're going split the list of monomials into chunks of chunk_size
# serialize each chunk, and insert separators between the chunks
for i in range(0,len(mono),chunk_size):
chunk = mono[i:i + chunk_size]
print("sum(chunk)",sum(chunk))
print("sym.latex(sum(chunk))",sym.latex(sum(chunk)))
if i == 0:
tex += r'$f(x)= %s$'%(sym.latex(sum(chunk)))+separator
else:
tex += '$%s$'%(sym.latex(sum(chunk))) + separator
return tex
popt = est.params
x = sym.symbols('x')
p = sym.Poly.from_list(reversed(popt),gens=x)
tex = tex_multiline_poly(p.as_expr(),chunk_size=2)
plt.title(text=tex)

In your code, you're inserting a linebreak for every even-power monomial, except for the last one.
if (i%2 == 0 and i !=len(p_list)):
str_expr =str_expr + " \n "
Since you are just building a polynomial from a list of coefficients, your code can be simplified.
Generally what we want is to build/transform/handle things symbolically, and only at the end serialize them and print the result in some specific format
import sympy as sym
x = sym.symbols('x')
def func(p_list):
expr = 0
for i in range(len(p_list)-1,-1,-1):
expr += p_list[i] * (x ** i)
return sym.sympify(expr)
popt = [-2,1,1]
p = func(popt)
p_tex = sym.latex(p)
p_str = str(p)
print("str:", p_str)
print("tex:", p_tex)
Output:
str: x**2 + x - 2
tex: x^{2} + x - 2
We could simplify this even further by using SymPy's built-in functions to build the poly from a list of coefficients:
import sympy as sym
from sympy import symbols
popt = [-2,1,1]
x = symbols('x')
p = sym.Poly.from_list(reversed(popt),gens=x)
p_tex = sym.latex(p.as_expr())
p_str = str(p.as_expr())
print("str:", p_str)
print("tex:", p_tex)
Output:
str: x**2 + x - 2
tex: x^{2} + x - 2
Does the output look like what you would expect?
UPDATE:
After learning more about the use-case, here's a version that inserts separators every N=2 monomials in the latex form of your expression.
import sympy as sym
from sympy import symbols
popt = [-2,1,1]
x = symbols('x')
p = sym.Poly.from_list(reversed(popt),gens=x)
def tex_multiline_poly(e, chunk_size=2, separator="\n"):
tex = ""
# split into monomials
mono = list(reversed(e.args))
# we're going split the list of monomials into chunks of chunk_size
# serialize each chunk, and insert separators between the chunks
for i in range(0,len(mono),chunk_size):
chunk = mono[i:i + chunk_size]
tex += sym.latex(sum(chunk)) + separator
return tex
p_tex = tex_multiline_poly(p.as_expr(),chunk_size=2)
p_str = str(p.as_expr())
print("str:",p_str)
print("tex:",p_tex)
Output:
str: x**2 + x - 2
tex: x^{2} + x
-2
Edit: wrong edit

Related

RGB to HSV in numpy

I'm trying to implement RGB to HSV conversion from opencv in pure numpy using formula from here:
def rgb2hsv_opencv(img_rgb):
img_hsv = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV)
return img_hsv
def rgb2hsv_np(img_rgb):
assert img_rgb.dtype == np.float32
height, width, c = img_rgb.shape
r, g, b = img_rgb[:,:,0], img_rgb[:,:,1], img_rgb[:,:,2]
t = np.min(img_rgb, axis=-1)
v = np.max(img_rgb, axis=-1)
s = (v - t) / (v + 1e-6)
s[v==0] = 0
# v==r
hr = 60 * (g - b) / (v - t + 1e-6)
# v==g
hg = 120 + 60 * (b - r) / (v - t + 1e-6)
# v==b
hb = 240 + 60 * (r - g) / (v - t + 1e-6)
h = np.zeros((height, width), np.float32)
h = h.flatten()
hr = hr.flatten()
hg = hg.flatten()
hb = hb.flatten()
h[(v==r).flatten()] = hr[(v==r).flatten()]
h[(v==g).flatten()] = hg[(v==g).flatten()]
h[(v==b).flatten()] = hb[(v==b).flatten()]
h[h<0] += 360
h = h.reshape((height, width))
img_hsv = np.stack([h, s, v], axis=-1)
return img_hsv
img_bgr = cv2.imread('00000.png')
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
img_rgb = img_rgb / 255.0
img_rgb = img_rgb.astype(np.float32)
img_hsv1 = rgb2hsv_np(img_rgb)
img_hsv2 = rgb2hsv_opencv(img_rgb)
print('max diff:', np.max(np.fabs(img_hsv1 - img_hsv2)))
print('min diff:', np.min(np.fabs(img_hsv1 - img_hsv2)))
print('mean diff:', np.mean(np.fabs(img_hsv1 - img_hsv2)))
But I get big diff:
max diff: 240.0
min diff: 0.0
mean diff: 0.18085355
Do I missing something?
Also maybe it's possible to write numpy code more efficient, for example without flatten?
Also I have hard time finding original C++ code for cvtColor function, as I understand it should be actually function cvCvtColor from C code, but I can't find actual source code with formula.
From the fact that the max difference is exactly 240, I'm pretty sure that what's happening is in the case when both or either of v==r, v==g are simultaneously true alongside v==b, which gets executed last.
If you change the order from:
h[(v==r).flatten()] = hr[(v==r).flatten()]
h[(v==g).flatten()] = hg[(v==g).flatten()]
h[(v==b).flatten()] = hb[(v==b).flatten()]
To:
h[(v==r).flatten()] = hr[(v==r).flatten()]
h[(v==b).flatten()] = hb[(v==b).flatten()]
h[(v==g).flatten()] = hg[(v==g).flatten()]
The max difference may start showing up as 120, because of that added 120 in that equation. So ideally, you would want to execute these three lines in the order b->g->r. The difference should be negligible then (still noticing a max difference of 0.01~, chalking it up to some round off somewhere).
h[(v==b).flatten()] = hb[(v==b).flatten()]
h[(v==g).flatten()] = hg[(v==g).flatten()]
h[(v==r).flatten()] = hr[(v==r).flatten()]

moving average difference between numpy and mathdotnet.com

First, a picture:
Column A is my source data, 50 points.
Column C and D are the SMA calculated with numpy and mathdotnet.com, respectively, with a window of 15.
Column F is the delta.
As we can see, about halfway, the data becomes identical, but the first half is not. I do not understand why, and, more importantly, do not know what to trust.
So I got from SO an optimized version of the SMA and ran the data through it.
The code is here:
private static NDArray SMA(this NDArray Data, int Period)
{
var Length = Data.len;
// calculate the moving average
var Buffer = new double[Period];
var Output = new double[Length];
var CurrentIndex = 0;
for (var i = 0; i < Length; i++)
{
Buffer[CurrentIndex] = Data.GetDouble(i) / Period;
var MA = 0.0;
for (var j = 0; j < Period; j++)
{
MA += Buffer[j];
}
Output[i] = MA;
CurrentIndex = (CurrentIndex + 1) % Period;
}
var R = new ArraySegment<double>(Output, Period - 1, Length - Period + 1);
return new NDArray(R.ToArray());
}
It is using NumSharp, the .net port of numpy, to hold the source array.
While it is all different code, the C# code and python numpy output the same results (differences happen after the 12th decimal point, so we can consider them identical).
This points out to mathdotnet.com being different; so I guess I can trust the numpy / C# versions more.
Are there different variations of the SMA that could cause this? or something obvious I don't see?
I have put all the data here: https://pastebin.com/WgYJUUJF
Edit:
Here is the numpy code:
import numpy as np
def calcSma(data, smaPeriod):
j = next(i for i, x in enumerate(data) if x is not None)
our_range = range(len(data))[j + smaPeriod - 1:]
empty_list = [None] * (j + smaPeriod - 1)
sub_result = [np.mean(data[i - smaPeriod + 1: i + 1]) for i in our_range]
return np.array(empty_list + sub_result)
def calcSma2(data_set, periods=3):
weights = np.ones(periods) / periods
return np.convolve(data_set, weights, mode='valid')
a = np.array([1.1282553063375, 1.13157696082132, 1.13275406120136, 1.1332879715733, 1.12761933580452, 1.12621836040801, 1.12282485875706, 1.12265572041877, 1.13094386506532, 1.12320520490577, 1.12427293064877, 1.1328332027022, 1.13099445663901, 1.12843355605048, 1.13002750724853, 1.12843355605048, 1.13099445663901, 1.12709476494142, 1.12684879712348, 1.12672349888807, 1.12600933402474, 1.13112070248549, 1.12985951088976, 1.12822416032659, 1.12471789559362, 1.12651004224413, 1.12442669033881, 1.12334638977164, 1.12714333124378, 1.1312233808195, 1.12713229372575, 1.128255040952, 1.12585669781931, 1.12763457442902, 1.12470631424376, 1.12223443223443, 1.12506842815956, 1.12691187181355, 1.12385654130971, 1.13026344596074, 1.12237927400894, 1.1245915922457, 1.13088395780284, 1.13211944646759, 1.12590649028825, 1.12829127560895, 1.11876736364966, 1.12222667492441, 1.12169543369019, 1.12199031071285])
b = calcSma(a, 15)
c = calcSma2(a, 15)
print b
print "----------------------------------"
print c
and here is the mathdotnet one:
var data = Vector<double>.Build.Dense(new[] { 1.1282553063375, 1.13157696082132, 1.13275406120136, 1.1332879715733, 1.12761933580452, 1.12621836040801, 1.12282485875706, 1.12265572041877, 1.13094386506532, 1.12320520490577, 1.12427293064877, 1.1328332027022, 1.13099445663901, 1.12843355605048, 1.13002750724853, 1.12843355605048, 1.13099445663901, 1.12709476494142, 1.12684879712348, 1.12672349888807, 1.12600933402474, 1.13112070248549, 1.12985951088976, 1.12822416032659, 1.12471789559362, 1.12651004224413, 1.12442669033881, 1.12334638977164, 1.12714333124378, 1.1312233808195, 1.12713229372575, 1.128255040952, 1.12585669781931, 1.12763457442902, 1.12470631424376, 1.12223443223443, 1.12506842815956, 1.12691187181355, 1.12385654130971, 1.13026344596074, 1.12237927400894, 1.1245915922457, 1.13088395780284, 1.13211944646759, 1.12590649028825, 1.12829127560895, 1.11876736364966, 1.12222667492441, 1.12169543369019, 1.12199031071285 });
var sma = Vector<double>.Build.Dense(data.MovingAverage(15).Skip(14).ToArray());
var s = sma.Aggregate(string.Empty, (Current, v) => Current + $"{v}, ");
Console.WriteLine(s);

Vpython greyscreen crash

I have found many times a solution for my problems from here, but this time I am totally baffled. I don't know what's wrong at my code.
I made a code to create a box with charged particles inside with Vpython. As I launch the program, I get only a grey screen and the program crash. No error message, nothing.
from visual import *
from random import *
def electronizer(num):
list = []
electron_charge = -1.60217662e-19
electron_mass = 9.10938356e-31
for i in range(num):
another_list = []
e = sphere(pos=(random(), random(),random()), radius=2.818e-15,
color=color.cyan)
e.v = vector(random(), random(), random())
another_list.append(e)
another_list.append(e.v)
another_list.append(electron_charge)
another_list.append(electron_mass)
list.append(another_list)
return list
def protonizer(num):
list = []
proton_charge = 1.60217662e-19
proton_mass = 1.6726219e-27
for i in range(num):
another_list = []
p = sphere(pos=(random(), random(),random()), radius=0.8408739e-15, color=color.red)
p.v = vector(random(), random(), random())
another_list.append(p)
another_list.append(p.v)
another_list.append(proton_charge)
another_list.append(proton_mass)
list.append(another_list)
return list
def cross(a, b):
c = vector(a[1]*b[2] - a[2]*b[1],
a[2]*b[0] - a[0]*b[2],
a[0]*b[1] - a[1]*b[0])
return c
def positioner(work_list):
k = 8.9875517873681764e3 #Nm2/C2
G = 6.674e-11 # Nm2/kg2
vac_perm = 1.2566370614e-6 # H/m
pi = 3.14159265
dt = 0.1e-3
constant = 1
force = vector(0,0,0)
for i in range(len(work_list)):
for j in range(len(work_list)):
if i != j:
r = work_list[i][0].pos - work_list[j][0].pos
r_mag = mag(r)
r_norm = norm(r)
F = k * ((work_list[i][2] * work_list[j][2]) / (r_mag**2)) * r_norm
force += F
B = constant*(vac_perm / 4*pi) * (cross(work_list[j][2] * work_list[j][1], norm(r)))/r_mag**2
F = cross(work_list[i][2] * work_list[i][1], B)
force += F
F = -(G * work_list[i][3] * work_list[j][3]) / r_mag**2 * r_norm
force += F
acceleration = force / work_list[i][3]
difference_in_velocity = acceleration * dt
work_list[i][1] += difference_in_velocity
difference_in_position = work_list[i][1] * dt
work_list[i][0].pos += difference_in_position
if abs(work_list[i][0].pos[0]) > 2.5e-6:
work_list[i][1][0] = -work_list[i][1][0]
elif abs(work_list[i][0][1]) > 2.5e-6:
work_list[i][1][1] = -work_list[i][1][1]
elif abs(work_list[i][0][2]) > 2.5e-6:
work_list[i][1][2] = -work_list[i][1][2]
return work_list
box = box(pos=(0, 0, 0), length = 5e-6, width = 5e-6, height = 5e-6, opacity = 0.5)
protons_num = raw_input("number of protons: ")
electrons_num = raw_input("number of electrons: ")
list_of_electrons = electronizer(int(electrons_num))
list_of_protons = protonizer(int(protons_num))
work_list = list_of_electrons + list_of_protons
while True:
work_list = positioner(work_list)
You should ask your question on the VPython.org forum where the VPython experts hang out and will be able to answer your question. You should mention which operating system you are using and which version of python you are using. From your code I see that you are using classic VPython. There is a newer version of VPython 7 that just came out but the VPython syntax has changed.

How to declare constraints with variable as array index in Z3Py?

Suppose x,y,z are int variables and A is a matrix, I want to express a constraint like:
z == A[x][y]
However this leads to an error:
TypeError: object cannot be interpreted as an index
What would be the correct way to do this?
=======================
A specific example:
I want to select 2 items with the best combination score,
where the score is given by the value of each item and a bonus on the selection pair.
For example,
for 3 items: a, b, c with related value [1,2,1], and the bonus on pairs (a,b) = 2, (a,c)=5, (b,c) = 3, the best selection is (a,c), because it has the highest score: 1 + 1 + 5 = 7.
My question is how to represent the constraint of selection bonus.
Suppose CHOICE[0] and CHOICE[1] are the selection variables and B is the bonus variable.
The ideal constraint should be:
B = bonus[CHOICE[0]][CHOICE[1]]
but it results in TypeError: object cannot be interpreted as an index
I know another way is to use a nested for to instantiate first the CHOICE, then represent B, but this is really inefficient for large quantity of data.
Could any expert suggest me a better solution please?
If someone wants to play a toy example, here's the code:
from z3 import *
items = [0,1,2]
value = [1,2,1]
bonus = [[1,2,5],
[2,1,3],
[5,3,1]]
choices = [0,1]
# selection score
SCORE = [ Int('SCORE_%s' % i) for i in choices ]
# bonus
B = Int('B')
# final score
metric = Int('metric')
# selection variable
CHOICE = [ Int('CHOICE_%s' % i) for i in choices ]
# variable domain
domain_choice = [ And(0 <= CHOICE[i], CHOICE[i] < len(items)) for i in choices ]
# selection implication
constraint_sel = []
for c in choices:
for i in items:
constraint_sel += [Implies(CHOICE[c] == i, SCORE[c] == value[i])]
# choice not the same
constraint_neq = [CHOICE[0] != CHOICE[1]]
# bonus constraint. uncomment it to see the issue
# constraint_b = [B == bonus[val(CHOICE[0])][val(CHOICE[1])]]
# metric definition
constraint_sumscore = [metric == sum([SCORE[i] for i in choices ]) + B]
constraints = constraint_sumscore + constraint_sel + domain_choice + constraint_neq + constraint_b
opt = Optimize()
opt.add(constraints)
opt.maximize(metric)
s = []
if opt.check() == sat:
m = opt.model()
print [ m.evaluate(CHOICE[i]) for i in choices ]
print m.evaluate(metric)
else:
print "failed to solve"
Turns out the best way to deal with this problem is to actually not use arrays at all, but simply create integer variables. With this method, the 317x317 item problem originally posted actually gets solved in about 40 seconds on my relatively old computer:
[ 0.01s] Data loaded
[ 2.06s] Variables defined
[37.90s] Constraints added
[38.95s] Solved:
c0 = 19
c1 = 99
maxVal = 27
Note that the actual "solution" is found in about a second! But adding all the required constraints takes the bulk of the 40 seconds spent. Here's the encoding:
from z3 import *
import sys
import json
import sys
import time
start = time.time()
def tprint(s):
global start
now = time.time()
etime = now - start
print "[%ss] %s" % ('{0:5.2f}'.format(etime), s)
# load data
with open('data.json') as data_file:
dic = json.load(data_file)
tprint("Data loaded")
items = dic['items']
valueVals = dic['value']
bonusVals = dic['bonusVals']
vals = [[Int("val_%d_%d" % (i, j)) for j in items if j > i] for i in items]
tprint("Variables defined")
opt = Optimize()
for i in items:
for j in items:
if j > i:
opt.add(vals[i][j-i-1] == valueVals[i] + valueVals[j] + bonusVals[i][j])
c0, c1 = Ints('c0 c1')
maxVal = Int('maxVal')
opt.add(Or([Or([And(c0 == i, c1 == j, maxVal == vals[i][j-i-1]) for j in items if j > i]) for i in items]))
tprint("Constraints added")
opt.maximize(maxVal)
r = opt.check ()
if r == unsat or r == unknown:
raise Z3Exception("Failed")
tprint("Solved:")
m = opt.model()
print " c0 = %s" % m[c0]
print " c1 = %s" % m[c1]
print " maxVal = %s" % m[maxVal]
I think this is as fast as it'll get with Z3 for this problem. Of course, if you want to maximize multiple metrics, then you can probably structure the code so that you can reuse most of the constraints, thus amortizing the cost of constructing the model just once, and incrementally optimizing afterwards for optimal performance.

SHA 256 pseuedocode?

I've been trying to work out how SHA-256 works. One thing I've been doing for other algorithms is I've worked out a sort of step by step pseudocode function for the algorithm.
I've tried to do the same for SHA256 but thus far I'm having quite a bit of trouble.
I've tried to work out how the Wikipedia diagram works but besides the text part explaining the functions I'm not sure I've got it right.
Here's what I have so far:
Input is an array 8 items long where each item is 32 bits.
Output is an array 8 items long where each item is 32 bits.
Calculate all the function boxes and store those values. I'll refer to them by function name.
Store input, right shifted by 32 bits, into output.
At this point, in the out array, E is the wrong value and A is empty
Store the function boxes.
now we need to calculate out E and out A.
note: I've replaced the modulo commands with a bitwise AND 2^(32-1)
I can't figure out how the modulus adding lines up, but I think it is like this:
Store (Input H + Ch + ( (Wt+Kt) AND 2^31 ) ) AND 2^31 As mod1
Store (sum1 + mod1) AND 2^31 as mod2
Store (d + mod2) AND 2^31 into output E
now output E is correct and all we need is output A
Store (MA + mod2) AND 2^31 as mod3
Store (sum0 + mod3) AND 2^31 into output A
output now contains the correct hash of input.
Do we return now or does this need to be run repeatedly?
Did I get all of those addition modulos right?
what are Wt and Kt?
Would this get run once, and you're done or does it need to be run a certain number of times, with the output being re-used as input?
Here's the link by the way.
http://en.wikipedia.org/wiki/SHA-2#Hash_function
Thanks alot,
Brian
Have a look at the official standard that describes the algorithm, the variables are described here: http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
(Oh, now I see I'm almost a year late with my answer, ah, never mind...)
W_t is derived from the current block being processed while K_t is a fixed constant determined by the iteration number. The compression function is repeated 64 times for each block in SHA256. There is a specific constant K_t and a derived value W_t for each iteration 0 <= t <= 63.
I have provided my own implementation of SHA256 using Python 3.6. The tuple K contains the 64 constant values of K_t. The Sha256 function shows how the value of W_t is computed in the list W. The implementation focuses on code clarity and not high-performance.
W = 32 #Number of bits in word
M = 1 << W
FF = M - 1 #0xFFFFFFFF (for performing addition mod 2**32)
#Constants from SHA256 definition
K = (0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2)
#Initial values for compression function
I = (0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19)
def RR(x, b):
'''
32-bit bitwise rotate right
'''
return ((x >> b) | (x << (W - b))) & FF
def Pad(W):
'''
Pads a message and converts to byte array
'''
mdi = len(W) % 64
L = (len(W) << 3).to_bytes(8, 'big') #Binary of len(W) in bits
npad = 55 - mdi if mdi < 56 else 119 - mdi #Pad so 64 | len; add 1 block if needed
return bytes(W, 'ascii') + b'\x80' + (b'\x00' * npad) + L #64 | 1 + npad + 8 + len(W)
def Sha256CF(Wt, Kt, A, B, C, D, E, F, G, H):
'''
SHA256 Compression Function
'''
Ch = (E & F) ^ (~E & G)
Ma = (A & B) ^ (A & C) ^ (B & C) #Major
S0 = RR(A, 2) ^ RR(A, 13) ^ RR(A, 22) #Sigma_0
S1 = RR(E, 6) ^ RR(E, 11) ^ RR(E, 25) #Sigma_1
T1 = H + S1 + Ch + Wt + Kt
return (T1 + S0 + Ma) & FF, A, B, C, (D + T1) & FF, E, F, G
def Sha256(M):
'''
Performs SHA256 on an input string
M: The string to process
return: A 32 byte array of the binary digest
'''
M = Pad(M) #Pad message so that length is divisible by 64
DG = list(I) #Digest as 8 32-bit words (A-H)
for j in range(0, len(M), 64): #Iterate over message in chunks of 64
S = M[j:j + 64] #Current chunk
W = [0] * 64
W[0:16] = [int.from_bytes(S[i:i + 4], 'big') for i in range(0, 64, 4)]
for i in range(16, 64):
s0 = RR(W[i - 15], 7) ^ RR(W[i - 15], 18) ^ (W[i - 15] >> 3)
s1 = RR(W[i - 2], 17) ^ RR(W[i - 2], 19) ^ (W[i - 2] >> 10)
W[i] = (W[i - 16] + s0 + W[i-7] + s1) & FF
A, B, C, D, E, F, G, H = DG #State of the compression function
for i in range(64):
A, B, C, D, E, F, G, H = Sha256CF(W[i], K[i], A, B, C, D, E, F, G, H)
DG = [(X + Y) & FF for X, Y in zip(DG, (A, B, C, D, E, F, G, H))]
return b''.join(Di.to_bytes(4, 'big') for Di in DG) #Convert to byte array
if __name__ == "__main__":
bd = Sha256('Hello World')
print(''.join('{:02x}'.format(i) for i in bd))
initial_hash_values=[
'6a09e667','bb67ae85','3c6ef372','a54ff53a',
'510e527f','9b05688c','1f83d9ab','5be0cd19'
]
sha_256_constants=[
'428a2f98','71374491','b5c0fbcf','e9b5dba5',
'3956c25b','59f111f1','923f82a4','ab1c5ed5',
'd807aa98','12835b01','243185be','550c7dc3',
'72be5d74','80deb1fe','9bdc06a7','c19bf174',
'e49b69c1','efbe4786','0fc19dc6','240ca1cc',
'2de92c6f','4a7484aa','5cb0a9dc','76f988da',
'983e5152','a831c66d','b00327c8','bf597fc7',
'c6e00bf3','d5a79147','06ca6351','14292967',
'27b70a85','2e1b2138','4d2c6dfc','53380d13',
'650a7354','766a0abb','81c2c92e','92722c85',
'a2bfe8a1','a81a664b','c24b8b70','c76c51a3',
'd192e819','d6990624','f40e3585','106aa070',
'19a4c116','1e376c08','2748774c','34b0bcb5',
'391c0cb3','4ed8aa4a','5b9cca4f','682e6ff3',
'748f82ee','78a5636f','84c87814','8cc70208',
'90befffa','a4506ceb','bef9a3f7','c67178f2'
]
def bin_return(dec):
return(str(format(dec,'b')))
def bin_8bit(dec):
return(str(format(dec,'08b')))
def bin_32bit(dec):
return(str(format(dec,'032b')))
def bin_64bit(dec):
return(str(format(dec,'064b')))
def hex_return(dec):
return(str(format(dec,'x')))
def dec_return_bin(bin_string):
return(int(bin_string,2))
def dec_return_hex(hex_string):
return(int(hex_string,16))
def L_P(SET,n):
to_return=[]
j=0
k=n
while k<len(SET)+1:
to_return.append(SET[j:k])
j=k
k+=n
return(to_return)
def s_l(bit_string):
bit_list=[]
for i in range(len(bit_string)):
bit_list.append(bit_string[i])
return(bit_list)
def l_s(bit_list):
bit_string=''
for i in range(len(bit_list)):
bit_string+=bit_list[i]
return(bit_string)
def rotate_right(bit_string,n):
bit_list = s_l(bit_string)
count=0
while count <= n-1:
list_main=list(bit_list)
var_0=list_main.pop(-1)
list_main=list([var_0]+list_main)
bit_list=list(list_main)
count+=1
return(l_s(list_main))
def shift_right(bit_string,n):
bit_list=s_l(bit_string)
count=0
while count <= n-1:
bit_list.pop(-1)
count+=1
front_append=['0']*n
return(l_s(front_append+bit_list))
def mod_32_addition(input_set):
value=0
for i in range(len(input_set)):
value+=input_set[i]
mod_32 = 4294967296
return(value%mod_32)
def xor_2str(bit_string_1,bit_string_2):
xor_list=[]
for i in range(len(bit_string_1)):
if bit_string_1[i]=='0' and bit_string_2[i]=='0':
xor_list.append('0')
if bit_string_1[i]=='1' and bit_string_2[i]=='1':
xor_list.append('0')
if bit_string_1[i]=='0' and bit_string_2[i]=='1':
xor_list.append('1')
if bit_string_1[i]=='1' and bit_string_2[i]=='0':
xor_list.append('1')
return(l_s(xor_list))
def and_2str(bit_string_1,bit_string_2):
and_list=[]
for i in range(len(bit_string_1)):
if bit_string_1[i]=='1' and bit_string_2[i]=='1':
and_list.append('1')
else:
and_list.append('0')
return(l_s(and_list))
def or_2str(bit_string_1,bit_string_2):
or_list=[]
for i in range(len(bit_string_1)):
if bit_string_1[i]=='0' and bit_string_2[i]=='0':
or_list.append('0')
else:
or_list.append('1')
return(l_s(or_list))
def not_str(bit_string):
not_list=[]
for i in range(len(bit_string)):
if bit_string[i]=='0':
not_list.append('1')
else:
not_list.append('0')
return(l_s(not_list))
'''
SHA-256 Specific Functions:
'''
def Ch(x,y,z):
return(xor_2str(and_2str(x,y),and_2str(not_str(x),z)))
def Maj(x,y,z):
return(xor_2str(xor_2str(and_2str(x,y),and_2str(x,z)),and_2str(y,z)))
def e_0(x):
return(xor_2str(xor_2str(rotate_right(x,2),rotate_right(x,13)),rotate_right(x,22)))
def e_1(x):
return(xor_2str(xor_2str(rotate_right(x,6),rotate_right(x,11)),rotate_right(x,25)))
def s_0(x):
return(xor_2str(xor_2str(rotate_right(x,7),rotate_right(x,18)),shift_right(x,3)))
def s_1(x):
return(xor_2str(xor_2str(rotate_right(x,17),rotate_right(x,19)),shift_right(x,10)))
def message_pad(bit_list):
pad_one = bit_list + '1'
pad_len = len(pad_one)
k=0
while ((pad_len+k)-448)%512 != 0:
k+=1
back_append_0 = '0'*k
back_append_1 = bin_64bit(len(bit_list))
return(pad_one+back_append_0+back_append_1)
def message_bit_return(string_input):
bit_list=[]
for i in range(len(string_input)):
bit_list.append(bin_8bit(ord(string_input[i])))
return(l_s(bit_list))
def message_pre_pro(input_string):
bit_main = message_bit_return(input_string)
return(message_pad(bit_main))
def message_parsing(input_string):
return(L_P(message_pre_pro(input_string),32))
def message_schedule(index,w_t):
new_word = bin_32bit(mod_32_addition([int(s_1(w_t[index-2]),2),int(w_t[index-7],2),int(s_0(w_t[index-15]),2),int(w_t[index-16],2)]))
return(new_word)
'''
This example of SHA_256 works for an input string >56 characters.
'''
def sha_256(input_string):
w_t=message_parsing(input_string)
a=bin_32bit(dec_return_hex(initial_hash_values[0]))
b=bin_32bit(dec_return_hex(initial_hash_values[1]))
c=bin_32bit(dec_return_hex(initial_hash_values[2]))
d=bin_32bit(dec_return_hex(initial_hash_values[3]))
e=bin_32bit(dec_return_hex(initial_hash_values[4]))
f=bin_32bit(dec_return_hex(initial_hash_values[5]))
g=bin_32bit(dec_return_hex(initial_hash_values[6]))
h=bin_32bit(dec_return_hex(initial_hash_values[7]))
for i in range(0,64):
if i <= 15:
t_1=mod_32_addition([int(h,2),int(e_1(e),2),int(Ch(e,f,g),2),int(sha_256_constants[i],16),int(w_t[i],2)])
t_2=mod_32_addition([int(e_0(a),2),int(Maj(a,b,c),2)])
h=g
g=f
f=e
e=mod_32_addition([int(d,2),t_1])
d=c
c=b
b=a
a=mod_32_addition([t_1,t_2])
a=bin_32bit(a)
e=bin_32bit(e)
if i > 15:
w_t.append(message_schedule(i,w_t))
t_1=mod_32_addition([int(h,2),int(e_1(e),2),int(Ch(e,f,g),2),int(sha_256_constants[i],16),int(w_t[i],2)])
t_2=mod_32_addition([int(e_0(a),2),int(Maj(a,b,c),2)])
h=g
g=f
f=e
e=mod_32_addition([int(d,2),t_1])
d=c
c=b
b=a
a=mod_32_addition([t_1,t_2])
a=bin_32bit(a)
e=bin_32bit(e)
hash_0 = mod_32_addition([dec_return_hex(initial_hash_values[0]),int(a,2)])
hash_1 = mod_32_addition([dec_return_hex(initial_hash_values[1]),int(b,2)])
hash_2 = mod_32_addition([dec_return_hex(initial_hash_values[2]),int(c,2)])
hash_3 = mod_32_addition([dec_return_hex(initial_hash_values[3]),int(d,2)])
hash_4 = mod_32_addition([dec_return_hex(initial_hash_values[4]),int(e,2)])
hash_5 = mod_32_addition([dec_return_hex(initial_hash_values[5]),int(f,2)])
hash_6 = mod_32_addition([dec_return_hex(initial_hash_values[6]),int(g,2)])
hash_7 = mod_32_addition([dec_return_hex(initial_hash_values[7]),int(h,2)])
final_hash = (hex_return(hash_0),
hex_return(hash_1),
hex_return(hash_2),
hex_return(hash_3),
hex_return(hash_4),
hex_return(hash_5),
hex_return(hash_6),
hex_return(hash_7))
return(final_hash)