How to index a torch.tensor by the index of tensor.min? - indexing

Example:
import torch
A = torch.rand(3,4,5)
min_A, index_A =A.min(1)
How to use A and index_A to obtain min_A?

Related

scipy convert coo string directly to numpy matrix

I already have a string in coo matrix format(row, col, value):
0 0 -1627.761282
0 1 342.811259
0 2 342.811259
0 3 171.372276
0 4 342.744553
0 5 342.744553
Now I want to convert my string directly to numpy matrix. Currently I have to write my string to file, then create a numpy matrix from file:
from scipy.sparse import coo_matrix
import numpy as np
with open("Output.txt", "w") as text_file:
text_file.write(matrix_str)
text = np.loadtxt( 'Output.txt', delimiter=' ' , dtype=str)
rows,cols,data = text.T
matrix = coo_matrix((data.astype(float), (rows.astype(int), cols.astype(int)))).todense()
How can I convert my string directly to numpy matrix without writing to file ? Please help
You could use StriongIO as follows.
import numpy as np
from scipy.sparse import coo_matrix
import io
with io.StringIO(matrix_str) as ss:
rows, cols, data = np.loadtxt(ss).T
matrix = coo_matrix((data.astype(float), (rows.astype(int), cols.astype(int)))).todense()

scipy optimization with multiple constraints

How to find solution to a linear regression with multiple constraints on the coefficients in python?
cvxpy is a good choice:
import cvxpy as cp
import numpy as np
np.random.seed(1)
y = 100*np.random.random(1)
x = 200*np.random.random(1000)-100
b = cp.Variable(1000)
constraints = [-100<=b, b<=100, cp.sum(b)==2]
obj = cp.Minimize( cp.square(y-x#b) )
prob = cp.Problem(obj, constraints)
val = prob.solve()
print(f"Objective value {val}")
print("b values: {0}".format(b.value))

In Pandas, how can a DataFrame be binned by two columns, with the other columns changed to the means within those bins?

I've got the standard iris dataset projected down to two dimensions using UMAP, with the UMAP dimensions for the x and y positions of the 2D plot added as columns to the dataframe:
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_iris
import umap # pip install umap-learn
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = pd.Series(iris.target).map(dict(zip(range(3), iris.target_names)))
_umap = umap.UMAP().fit_transform(iris.data)
iris_df['UMAP_x'] = _umap[:,0]
iris_df['UMAP_y'] = _umap[:,1]
iris_df.head()
I'd like to bin both the UMAP_x and UMAP_y columns into like 25 bins and then the other columns in the dataframe change to being the mean values of the columns in each of the bins. How might this be done? It feels like cut or resampling might lead to the answer, but I'm not sure how.
You can use cut to define bins and then use groupby with transform to calculate mean value for each bin.
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_iris
import umap
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = pd.Series(iris.target).map(dict(zip(range(3), iris.target_names)))
_umap = umap.UMAP().fit_transform(iris.data)
iris_df['UMAP_x'] = _umap[:,0]
iris_df['UMAP_y'] = _umap[:,1]
# Define bins for UMAP_x and UMAP_y params
iris_df['UMAP_x_bin'] = pd.cut(iris_df['UMAP_x'], bins=25)
iris_df['UMAP_y_bin'] = pd.cut(iris_df['UMAP_y'], bins=25)
# Calculate mean value for each bin
iris_df['UMAP_x_mean'] = iris_df.groupby('UMAP_x_bin')['UMAP_x'].transform('mean')
iris_df['UMAP_y_mean'] = iris_df.groupby('UMAP_y_bin')['UMAP_y'].transform('mean')
iris_df.head()

Using pdsit with string value in python scipy

I have a following code and I want to calculate the hamming strings of the strings:
from pandas import DataFrame
import numpy as np
import pandas as pd
from scipy.spatial.distance import pdist, squareform
df = pd.read_csv("3d_printing.csv", encoding='utf-8', error_bad_lines=False, low_memory=False, names=['file_name', 'phash', 'dhash', 'file_date'])
def hamming_distance(s1, s2):
if len(s1) != len(s2):
raise ValueError("Undefined for sequences of unequal length")
return sum(el1 != el2 for el1, el2 in zip(s1, s2))
df.sort_values(by='file_date', ascending=0)
x = pd.DataFrame(np.triu(squareform(pdist(df[['phash']], hamming_distance))),
columns=df.file_name.str.split('_').str[0],
index=df.file_name.str.split('_').str[0]).replace(0, np.nan)
z = x[x.apply(lambda col: col.index != col.name)].max(1).max(level=0)
z.to_csv("3d_printing_x.csv", mode='a')
When I run the code I get
ValueError: could not convert string to float: '002889898888b8a9'
I know that pdist requires float values, but at this point I don't know what to do

passing numpy array as parameter in theano function

As a beginner, i was trying to simply compute the dot product of two matrices using theano.
my code is very simple.
import theano
import theano.tensor as T
import numpy as np
from theano import function
def covarience(array):
input_array=T.matrix('input_array')
deviation_matrix = T.matrix('deviation_matrix')
matrix_filled_with_1s=T.matrix('matrix_filled_with_1s')
z = T.dot(input_array, matrix_filled_with_1s)
identity=np.ones((len(array),len(array)))
f=function([array,identity],z)
# print(f)
covarience(np.array([[2,4],[6,8]]))
but the problem is each time i run this code , i get error message like "TypeError: Unknown parameter type: "
Can anyone tell me whats wrong with my code?
You cannot pass numpy array to theano function, theano functions can only be defined by theano.tensor variables. So you can always define computations with interaction of tensor/symbolic variables, and to perform actual computation on values/real data you can use functions, it doesn't make sense to define theano function itself with numpy array.
This should work:
import theano
import theano.tensor as T
import numpy as np
a = T.matrix('a')
b = T.matrix('b')
z = T.dot(a, b)
f = theano.function([a, b], z)
a_d = np.asarray([[2, 4], [6, 8]], dtype=theano.config.floatX)
b_d = np.ones(a_d.shape, dtype=theano.config.floatX)
print(f(a_d, b_d))