word2vec, sum or average word embeddings? - cosine-similarity

I'm using word2vec to represent a small phrase (3 to 4 words) as a unique vector, either by adding each individual word embedding or by calculating the average of word embeddings.
From the experiments I've done I always get the same cosine similarity. I suspect it has to do with the word vectors generated by word2vec being normed to unit length (Euclidean norm) after training? or either I have a BUG in the code, or I'm missing something.
Here is the code:
import numpy as np
from nltk import PunktWordTokenizer
from gensim.models import Word2Vec
from numpy.linalg import norm
from scipy.spatial.distance import cosine
def pattern2vector(tokens, word2vec, AVG=False):
pattern_vector = np.zeros(word2vec.layer1_size)
n_words = 0
if len(tokens) > 1:
for t in tokens:
try:
vector = word2vec[t.strip()]
pattern_vector = np.add(pattern_vector,vector)
n_words += 1
except KeyError, e:
continue
if AVG is True:
pattern_vector = np.divide(pattern_vector,n_words)
elif len(tokens) == 1:
try:
pattern_vector = word2vec[tokens[0].strip()]
except KeyError:
pass
return pattern_vector
def main():
print "Loading word2vec model ...\n"
word2vecmodelpath = "/data/word2vec/vectors_200.bin"
word2vec = Word2Vec.load_word2vec_format(word2vecmodelpath, binary=True)
pattern_1 = 'founder and ceo'
pattern_2 = 'co-founder and former chairman'
tokens_1 = PunktWordTokenizer().tokenize(pattern_1)
tokens_2 = PunktWordTokenizer().tokenize(pattern_2)
print "vec1", tokens_1
print "vec2", tokens_2
p1 = pattern2vector(tokens_1, word2vec, False)
p2 = pattern2vector(tokens_2, word2vec, False)
print "\nSUM"
print "dot(vec1,vec2)", np.dot(p1,p2)
print "norm(p1)", norm(p1)
print "norm(p2)", norm(p2)
print "dot((norm)vec1,norm(vec2))", np.dot(norm(p1),norm(p2))
print "cosine(vec1,vec2)", np.divide(np.dot(p1,p2),np.dot(norm(p1),norm(p2)))
print "\n"
print "AVG"
p1 = pattern2vector(tokens_1, word2vec, True)
p2 = pattern2vector(tokens_2, word2vec, True)
print "dot(vec1,vec2)", np.dot(p1,p2)
print "norm(p1)", norm(p1)
print "norm(p2)", norm(p2)
print "dot(norm(vec1),norm(vec2))", np.dot(norm(p1),norm(p2))
print "cosine(vec1,vec2)", np.divide(np.dot(p1,p2),np.dot(norm(p1),norm(p2)))
if __name__ == "__main__":
main()
and here is the output:
Loading word2vec model ...
Dimensions 200
vec1 ['founder', 'and', 'ceo']
vec2 ['co-founder', 'and', 'former', 'chairman']
SUM
dot(vec1,vec2) 5.4008677771
norm(p1) 2.19382594282
norm(p2) 2.87226958166
dot((norm)vec1,norm(vec2)) 6.30125952303
cosine(vec1,vec2) 0.857109242583
AVG
dot(vec1,vec2) 0.450072314758
norm(p1) 0.731275314273
norm(p2) 0.718067395416
dot(norm(vec1),norm(vec2)) 0.525104960252
cosine(vec1,vec2) 0.857109242583
I'm using the cosine similarity as defined here Cosine Similarity (Wikipedia). The values for the norms and dot products are indeed different.
Can anyone explain why the cosine is the same?
Thank you,
David

Cosine measures the angle between two vectors and does not take the length of either vector into account. When you divide by the length of the phrase, you are just shortening the vector, not changing its angular position. So your results look correct to me.

Related

Tensor flow and tflearn Chatbot keeps on getting high probability even when user input is wrong

I coded a simple AI chatbot with TensorFlow and tflearn and it runs just fine but the issue is when the user inputs the wrong thing, the bot is supposed to say it doesnt understand if the prediction accuracy is less than 70%, but the bot always scores above that even if the user gives jibberish like "rjrigrejfr". The bot assumes theyre greeting them. The patterns its supposed to study in the json are "patterns": ["Hi", "How are you", "Wassup", "Hello", "Good day", "Waddup", "Yo"]. I can share the json file if needed its short. Anyway, this is the python code:
import numpy as np
import nltk
import tensorflow
import tflearn
import random
import json
import pickle
# Some extra configuration:
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()
nltk.download('punkt')
# Load the data from the json file into a variable.
with open("intents.json") as file:
data = json.load(file)
# If we already have saved data, we do not need to retrain the model and waste time (could develop into an issue in more complex programs. Save in pickle. )
try:
with open("data.pickle", "rb") as f: # rb stands for bytes.
words, labels, training, output = pickle.load(f)
# --- Pre-training data preparation ---
except:
words = []
docsx = [] # Stores patterns
docsy = [] # Stores intents
labels = [] # All the specific tag values such as greeting, contact, etc.
for intent in data["intents"]:
for pattern in intent["patterns"]:
w = nltk.word_tokenize(pattern) # nltk function that splits the sentences inside intent into words list.
words.extend(w) # Add the tokenized list to words list.
docsx.append(w)
docsy.append(intent["tag"]) # append the classification of the sentence
if intent["tag"] not in labels:
labels.append(intent["tag"])
words = [stemmer.stem(w.lower()) for w in words if w not in ".?!"] # Stemming the words to remove unnecessary elements leaving their root. Convert all to lowercase.
words = sorted(list(set(words))) # Set ensures no duplicate elements then we convert back to list and sort it.
labels = sorted(labels)
training = []
output = []
out_empty = [0 for i in range(len(labels))] # Gives a list of 0 ints based on # of tags. This is useful later in the program when binerizing.
# One hot encoding the intent categories. Need to one-hot code the data which improves the efficiency of the ML to "binerize" the data.
# In this case, we have a list of 0s and 1s if the word appears it is assigned a 1 else a 0.
for x, doc in enumerate(docsx):
bag = [] # Bag of words or the one-hot coded data for the ML.
docx_word_stemmed = [stemmer.stem(word) for word in doc] # Stemming the data in docx.
# Now adding and transforming data into the one-hot coded list/bag of words data.
for i in words:
if i in docx_word_stemmed: # Checking against stemmed words:
# Word exists
bag.append(1)
else:
bag.append(0)
output_row = out_empty[:] # Copying out_empty
# Going through the labels list using .index() and for the occurance of docx value in docy, assign binary 1.
output_row[labels.index(docsy[x])] = 1
training.append(bag)
output.append(output_row)
# Required to use numpy arrays for use in tflearn. It is also faster.
training = np.array(training)
output = np.array(output)
# Saving the data so we do not need to do the data configuration every time.
with open("data.pickle", "wb") as f:
pickle.dump((words, labels, training, output), f)
try:
model.load('model.tflearn')
except:
tensorflow.compat.v1.reset_default_graph()
net = tflearn.input_data(shape=[None, len(training[0])])
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, len(output[0]), activation='softmax')
net = tflearn.regression(net)
model = tflearn.DNN(net)
model.fit(training, output, n_epoch=1000, batch_size=8, show_metric=True)
model.save("model.tflearn")
def bagofwords(sentence, words):
bag = [0 for _ in range(len(words))] # blank bag of words.
# Tokenize s and then stem it.
sentence_words = nltk.word_tokenize(sentence)
sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
for string in sentence_words:
for i, word in enumerate(words):
if word == string:
bag[i] = 1
return np.array(bag)
def chat():
print("Hello there! I'm the SRO AI Virtual Assistant. How am I help you?")
# Figure out the error slime!
while True:
user_input = input("Type here:")
if user_input == "quit":
break
result = model.predict([bagofwords(user_input, words)])[0] #bagofwords func and predict function to give predictions on what the user is saying.
best_result = np.argmax(result) # We want to only use the best result.
tag = labels[best_result]
print(result[best_result])
# Open JSON file and pick a response.
if result[best_result] > 0.7:
for tg in data["intents"]:
if tg['tag'] == tag:
responses = tg['responses']
print(random.choice(responses))
else:
print("I don't quite understand")
chat()

passing panda dataframe data to functions and its not outputting the results

In my code, I am trying to extract data from csv file to use in the function, but it doesnt output anything, and gives no error. My code works because I tried it with just numpy array as inputs. not sure why it doesnt work with panda.
import numpy as np
import pandas as pd
import os
# change the current directory to the directory where the running script file is
os.chdir(os.path.dirname(os.path.abspath(__file__)))
# finding best fit line for y=mx+b by iteration
def gradient_descent(x,y):
m_iter = b_iter = 1 #starting point
iteration = 10000
n = len(x)
learning_rate = 0.05
last_mse = 10000
#take baby steps to reach global minima
for i in range(iteration):
y_predicted = m_iter*x + b_iter
#mse = 1/n*sum([value**2 for value in (y-y_predicted)]) # cost function to minimize
mse = 1/n*sum((y-y_predicted)**2) # cost function to minimize
if (last_mse - mse)/mse < 0.001:
break
# recall MSE formula is 1/n*sum((yi-y_predicted)^2), where y_predicted = m*x+b
# using partial deriv of MSE formula, d/dm and d/db
dm = -(2/n)*sum(x*(y-y_predicted))
db = -(2/n)*sum((y-y_predicted))
# use current predicted value to get the next value for prediction
# by using learning rate
m_iter = m_iter - learning_rate*dm
b_iter = b_iter - learning_rate*db
print('m is {}, b is {}, cost is {}, iteration {}'.format(m_iter,b_iter,mse,i))
last_mse = mse
#x = np.array([1,2,3,4,5])
#y = np.array([5,7,8,10,13])
#gradient_descent(x,y)
df = pd.read_csv('Linear_Data.csv')
x = df['Area']
y = df['Price']
gradient_descent(x,y)
My code works because I tried it with just numpy array as inputs. not sure why it doesnt work with panda.
Well no, your code also works with pandas dataframes:
df = pd.DataFrame({'Area': [1,2,3,4,5], 'Price': [5,7,8,10,13]})
x = df['Area']
y = df['Price']
gradient_descent(x,y)
Above will give you the same output as with numpy arrays.
Try to check what's in Linear_Data.csv and/or add some print statements in the gradient_descent function just to check your assumptions. I would suggest to first of all add a print statement before the condition with the break statement:
print(last_mse, mse)
if (last_mse - mse)/mse < 0.001:
break

Text classification using embedding for two columns of dataset

I am working on a project where i am using mental health related subreddit posts containing two feature columns (text, title) and a label column (Subreddit).
I want to use LSTM for classification where i need to create embedding matrix for both the columns in short need both columns for text classification but i cannot find the way to embed both columns.
Code i am using for text sequences is
text_sequences_train = token.texts_to_sequences(preprocessed_text_train)
title_sequences_train = token.texts_to_sequences(preprocessed_title_train)
#print(sequences_train)
train=np.hstack(text_sequences_train+title_sequences_train)
train.reshape(1,train.shape[0])
train_seq_x=pad_sequences(train, maxlen=300)
text_sequences_test = token.texts_to_sequences(preprocessed_text_test)
title_sequences_test = token.texts_to_sequences(preprocessed_title_test)
#print(sequences_train)
test=np.hstack(text_sequences_test+title_sequences_test)
test.reshape(1,test.shape[0])
test_seq_x=pad_sequences(test, maxlen=300)
text_sequences_val = token.texts_to_sequences(preprocessed_text_val)
title_sequences_val = token.texts_to_sequences(preprocessed_title_val)
#print(sequences_train)
val=np.hstack(text_sequences_val+title_sequences_val)
val.reshape(1,val.shape[0])
val_seq_x=pad_sequences(val, maxlen=300)
the above code gives me an error
ValueError: `sequences` must be a list of iterables. Found non-iterable: 428.0
code i am using for embedding matrix is
glove_file = "glove.42B.300d.txt"
import tqdm
EMBEDDING_VECTOR_LENGTH = 300 # <=200
def construct_embedding_matrix(glove_file, word_index):
embedding_dict = {}
with open(glove_file,'r', encoding='utf-8') as f:
for line in f:
values=line.split()
# get the word
word=values[0]
if word in word_index.keys():
# get the vector
vector = np.asarray(values[1:], 'float32')
embedding_dict[word] = vector
#print(embedding_dict[word].shape)
### oov words (out of vacabulary words) will be mapped to 0 vectors
num_words=len(word_index)+1
#initialize it to 0
embedding_matrix=np.zeros((num_words, EMBEDDING_VECTOR_LENGTH))
for word,i in tqdm.tqdm(word_index.items()):
if i < num_words:
vect=embedding_dict.get(word, [])
if len(vect)>0:
embedding_matrix[i] = vect[:EMBEDDING_VECTOR_LENGTH]
#print(embedding_matrix[i].shape)
print(embedding_matrix)
return embedding_matrix
embedding_matrix=construct_embedding_matrix(glove_file, word_index)
If I convert text sequences and then train test split it gives an error where X and Y no of samples do not match

tf.nn.softmax behaving strangely

I am learning LSTM with tensorflow with enable_eager_execution. However when implementing LSTM, I have noticed the behaviour of tf.nn.softmax
that has made me stuck. Here is a section of my code
class RNN_LSTM(object):
def __init__(self,hidden_size):
data=open('Shakespear.txt', 'r').read()
self.data = data.split()
vocab_size=len(list(set(self.data)))
self.words =list(set(self.data))
self.hidden_size=hidden_size
self.input_size=vocab_size+hidden_size
self.vocab_size=vocab_size
self.W1=tf.Variable(tf.random.uniform((self.hidden_size,self.input_size),dtype=tf.dtypes.float32,name="W1")*0.1)
self.b1=tf.Variable(tf.random.uniform((self.hidden_size,1),dtype=tf.dtypes.float32,name="b1"))
self.W2=tf.Variable(tf.random.uniform((self.hidden_size,self.input_size),dtype=tf.dtypes.float32,name="W2")*0.1)
self.b2=tf.Variable(tf.random.uniform((self.hidden_size,1),dtype=tf.dtypes.float32,name="b2")*0.1)
self.W3=tf.Variable(tf.random.uniform((self.hidden_size,self.input_size),dtype=tf.dtypes.float32,name="W3")*0.1)
self.b3=tf.Variable(tf.random.uniform((self.hidden_size,1),dtype=tf.dtypes.float32,name="b3")*0.1)
self.W4=tf.Variable(tf.random.uniform((hidden_size,self.input_size),dtype=tf.dtypes.float32,name="W4")*0.1)
self.b4=tf.Variable(tf.random.uniform((self.hidden_size,1),dtype=tf.dtypes.float32,name="b4")*0.1)
self.W5=tf.Variable(tf.random.uniform((self.vocab_size,self.hidden_size),dtype=tf.dtypes.float32,name="W5")*0.1)
self.b5=tf.Variable(tf.random.uniform((self.vocab_size,1),dtype=tf.dtypes.float32,name="b5")*0.1)
self.learning_rate=1e-1
self.sequence_length=50
#self.M_c=tf.Variable(tf.zeros((self.input_size,1)),name="M_c")
def one_hot_encoding(self,x,hprev):
M_c=tf.Variable(tf.zeros((self.input_size,1)),name="M_c")
vocab=tf.Variable(tf.zeros((self.vocab_size,1)))
#hprev=tf.Variable(tf.zeros((self.hidden_size,1)))
vocab=vocab.numpy()
vocab[x]=1
M_c=tf.concat((hprev,vocab),axis=0)
return M_c
def feedforward(self,M_c,p_s):
ft=tf.sigmoid( tf.matmul(self.W1,M_c)+self.b1)
it=tf.sigmoid(tf.matmul(self.W2,M_c)+self.b2)
gt=tf.math.tanh(tf.matmul(self.W3,M_c)+self.b3)
cs=tf.multiply(ft,p_s)+tf.multiply(it,gt)
ot=tf.nn.sigmoid(tf.matmul(self.W4,M_c)+self.b4)
ht=tf.multiply(ot,tf.math.tanh(cs))
output=self.softmax(tf.matmul(self.W5,ht)+self.b5)
return ht,output,cs
def sample_text(self,hprev,begin,p_s,n):
vocab=tf.Variable(tf.zeros((self.vocab_size,1)),tf.float32)
vocab=vocab.numpy()
vocab[begin]=1
letters=[]
for i in range(n):
M=tf.Variable(tf.zeros((self.input_size,1)),name="M")
M=tf.assign(M,tf.concat((hprev,vocab),axis=0))
ft=tf.nn.sigmoid(tf.matmul(self.W1,M)+self.b1)
it=tf.nn.sigmoid(tf.matmul(self.W2,M)+self.b2)
gt=tf.math.tanh(tf.matmul(self.W3,M)+self.b3)
cs=tf.multiply(ft,p_s)+tf.multiply(it,gt)
p_s=cs
ot=tf.sigmoid(tf.matmul(self.W4,M)+self.b4)
ht=tf.multiply(ot,tf.math.tanh(cs))
ht=tf.reshape(ht,(self.hidden_size,1))
output=tf.matmul(self.W5,ht)+self.b5
p=self.softmax(output)
#print(p.numpy())
p=tf.reshape(p,(1,self.vocab_size))
samples = tf.random.categorical(p,1)
sample_selected=tf.cast(samples[0][0].numpy(),tf.int32)
selection_sample_np=[i for i in range(self.vocab_size)]
selection_sample_tf=tf.convert_to_tensor(selection_sample_np)
selected_next_letter=selection_sample_tf[sample_selected]
trial=tf.cast(selected_next_letter,tf.int32)
k=tf.Variable(tf.zeros((self.vocab_size,1)),tf.int32)
k[selected_next_letter,0].assign(1)
letters.append(selected_next_letter)
hprev=ht
return letters
def process_input(self):
char_to_ix={ch:ix for ix,ch in enumerate(self.words)}
ix_to_char={ix:ch for ix,ch in enumerate(self.words)}
return char_to_ix,ix_to_char
def softmax(self,z):
return tf.math.exp(z-max(z))/tf.math.reduce_sum(tf.math.exp(z-max(z)))
def AggregatorNew(self):
losses,iterations=[],[]
char_to_ix,ix_to_char=self.process_input()
mem1=tf.Variable(tf.zeros_like(self.W1))
mem2=tf.Variable(tf.zeros_like(self.W2))
mem3=tf.Variable(tf.zeros_like(self.W3))
mem4=tf.Variable(tf.zeros_like(self.W4))
mem5=tf.Variable(tf.zeros_like(self.W5))
mem6=tf.Variable(tf.zeros_like(self.b1))
mem7=tf.Variable(tf.zeros_like(self.b2))
mem8=tf.Variable(tf.zeros_like(self.b3))
mem9=tf.Variable(tf.zeros_like(self.b4))
mem10=tf.Variable(tf.zeros_like(self.b5))
dW1=tf.Variable(tf.zeros_like(self.W1))
dW2=tf.Variable(tf.zeros_like(self.W2))
dW3=tf.Variable(tf.zeros_like(self.W3))
dW4=tf.Variable(tf.zeros_like(self.W4))
dW5=tf.Variable(tf.zeros_like(self.W4))
db1=tf.Variable(tf.zeros_like(self.b1))
db2=tf.Variable(tf.zeros_like(self.b2))
db3=tf.Variable(tf.zeros_like(self.b3))
db4=tf.Variable(tf.zeros_like(self.b4))
db5=tf.Variable(tf.zeros_like(self.b5))
n=0
p=0
self.loss=tf.Variable(0,dtype=tf.dtypes.float32,name="loss")
smooth_loss =-tf.math.log(1.0/self.vocab_size)*self.sequence_length
while(1):
try:
with DelayedKeyboardInterrupt():
if p+self.sequence_length+1>= len(self.data) or n == 0:
hprev=tf.Variable(np.zeros((self.hidden_size,1)),dtype=tf.float32,name="hprev")
p_s=tf.Variable(tf.zeros((self.hidden_size,1)),name="p_s")
p=0
inputs=[char_to_ix[ch] for ch in self.data[p:p+self.sequence_length]]
targets=[char_to_ix[ch] for ch in self.data[p+1:p+self.sequence_length+1]]
sample_ix = self.sample_text(hprev,inputs[0],p_s,200)
list_of_strings=[ix_to_char[ix.numpy()] for ix in sample_ix]
list_of_strings_tf=tf.convert_to_tensor(list_of_strings)
txt = tf.strings.join(list_of_strings_tf,separator=" ")
print ('----\n %s \n----' % (txt.numpy(), ))
#loss=tf.reduce_mean(xentropy,name="loss")
with tf.GradientTape() as g:
for x, y in zip(inputs,targets):
M_c=self.one_hot_encoding(x,hprev)
hprev,output,p_s=self.feedforward(M_c,p_s)
activation=output[y]
loss=-(tf.math.log(activation))
dW1,dW2,dW3,dW4,dW5,db1,db2,db3,db4,db5=g.gradient(loss,[self.W1,self.W2,self.W3,self.W4,self.W5,self.b1,self.b2,self.b3,self.b4,self.b5])
smooth_loss = smooth_loss * 0.999 + loss * 0.001
except KeyboardInterrupt:
sample_ix = self.sample_text(hprev,inputs[0],p_s,200)
txt = ''.join(ix_to_char[ix] for ix in sample_ix)
print ('----\n %s \n----' % (txt, ))
break
when I use self.softmax() it gives me probability values in the output in the feedforward, however when I use tf.nn.softmax() all values of output are strangely 1.
Second question: Is tensorflow generally slower in cpu as compared to a pure python implementation or i am implementing tensorlow wrongly?
If you are using tf.nn.softmax(), and you don't specify the axis, it defaults to tf.nn.softmax(logits ,axis=1) hence giving a tensor ouput where all values are 1s . In my case I was getting wrong values just because of not providing axis i.e tf.nn.softmax(logits,axis=0)

How can I update tensor (weight value) trying to use two separate network?

I've been trying to make AI for blackjack using RL. Now I'm trying to make two separate networks which is one way of DQN. I've searched the web and found some way and tried to use it but failed.
This error has occurred:
TypeError: Using a tf.Tensor as a Python bool is not allowed. Use if t is not None: instead of if t: to test if a tensor is defined, and use TensorFlow ops such as tf.cond to execute subgraphs conditioned on the value of a tensor.
Code:
import gym
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
def one_hot(x):
s=np.identity(600)
b = s[x[0] * 20 + x[1] * 2 + x[2]]
return b.reshape(1, 600)
def boolstr_to_floatstr(v):
if v == True:
return 1
elif v == False:
return 0
env=gym.make('Blackjack-v0')
learning_rate=0.5
state_number=600
action_number=2
#######################################3
X=tf.placeholder(tf.float32, shape=[1,state_number], name='input_data')
W1=tf.Variable(tf.random_uniform([state_number,128],0,0.01))#network for update
layer1=tf.nn.tanh(tf.matmul(X,W1))
W2=tf.Variable(tf.random_uniform([128,256],0,0.01))
layer2=tf.nn.tanh(tf.matmul(layer1,W2))
W3=tf.Variable(tf.random_uniform([256,action_number],0,0.01))
Qpred=tf.matmul(layer2,W3) # Qprediction
#####################################################################3
X1=tf.placeholder(shape=[1,state_number],dtype=tf.float32)
W4=tf.Variable(tf.random_uniform([state_number,128],0,0.01))#network for target
layer3=tf.nn.tanh(tf.matmul(X1,W4))
W5=tf.Variable(tf.random_uniform([128,256],0,0.01))
layer4=tf.nn.tanh(tf.matmul(layer3,W5))
W6=tf.Variable(tf.random_uniform([256,action_number],0,0.01))
target=tf.matmul(layer4,W6) # target
#################################################################
update1=W4.assign(W1)
update2=W5.assign(W2)
update3=W6.assign(W3)
Y=tf.placeholder(shape=[1,action_number],dtype=tf.float32)
loss=tf.reduce_sum(tf.square(Y-Qpred))#cost(W)=(Ws-y)^2
train=tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss)
num_episodes=1000
dis=0.99 #discount factor
rList=[] #record the reward
init=tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for i in range(num_episodes): #episode 만번
s = env.reset()
rALL = 0
done = False
e=1./((i/100)+1) #exploit or explore용 상수
total_loss=[]
while not done:
s = np.asarray(s)
s[2] = boolstr_to_floatstr(s[2])
#print(np.shape(one_hot(s)))
#print(one_hot(s))
Qs=sess.run(Qpred,feed_dict={X:one_hot(s).astype(np.float32)})
if np.random.rand(1)<e: #새로운 도전시도
a=env.action_space.sample()
else:
a=np.argmax(Qs) #그냥 내가아는한 최댓값의 액션 선택
s1,reward,done,_=env.step(a) #
s1=np.asarray(s1)
s1[2]=boolstr_to_floatstr(s1[2])
if done:
Qs[0,a]=reward
else:
Qs1=sess.run(target,feed_dict={X1:one_hot(s1)})
Qs[0,a]=reward+dis*np.max(Qs1) #optimal Q
sess.run(train,feed_dict={X:one_hot(s),Y:Qs})
if i%10==0: ##target 을 Qpredion으로 업데이트해줌
sess.run(update1,update2,update3)
if reward==1:
rALL += reward
else:
rALL+=0
s=s1
rList.append(rALL)
print('success rate: '+ str(sum(rList)/num_episodes))
print("Final Q-table values")
I need to print success rate finally. before DQN its 38%ish. If there is something wrong in my code considering its DQN algorithm, tell me please.
If you want to share the weights between different networks, then simply create layer with same name, using the scope with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): and then weights between networks will be shared automatically.