I have a csv file with following structure:
Tokens,Tags,Polarities
"['i', 'agree', 'about', 'arafat', '.', 'i', 'mean', ',', 'shit', ',', 'they', 'even', 'gave', 'one', 'to', 'jimmy', 'carter', 'ha', '.', 'it', 'should', 'be', 'called', ""''"", 'the', 'worst', 'president', ""''"", 'prize', '.']","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]"
"['musicmonday', 'britney', 'spears', '-', 'lucky', 'do', 'you', 'remember', 'this', 'song', '?', 'it', '`', 's', 'awesome', '.', 'i', 'love', 'it', '.']","[0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[-1, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]"
"['wtf', '?', 'hilary', 'swank', 'is', 'coming', 'to', 'my', 'school', 'today', ',', 'just', 'to', 'chill', '.', 'lol', 'wow']","[0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[-1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]"
"['my', '3-year-old', 'was', 'amazed', 'yesterday', 'to', 'find', 'that', ""'"", 'real', ""'"", '10', 'pin', 'bowling', 'is', 'nothing', 'like', 'it', 'is', 'on', 'the', 'wii', '...']","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1]"
"['God', 'damn', '.', 'That', 'Sony', 'remote', 'for', 'google', 'is', 'fucking', 'hideeeeeous', '!']","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]","[-1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1]"
I am trying to read the file as follows:
twitter_train = pd.read_csv('twitter_train.csv')
Then I can see that it has a correct structure:
twitter_train.head(3)
Tokens Tags Polarities
0 ['i', 'agree', 'about', 'arafat', '.', 'i', 'm... [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...
1 ['musicmonday', 'britney', 'spears', '-', 'luc... [0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [-1, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1,...
2 ['wtf', '?', 'hilary', 'swank', 'is', 'coming'... [0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... [-1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,...
I want to convert each column to a list of lists, for example:
twitter_train_lists = twitter_train['Tokens'].tolist()
But I have incorrect structure that has an extra \ or " with each element in the list and around each list itself:
['[\'i\', \'agree\', \'about\', \'arafat\', \'.\', \'i\', \'mean\', \',\', \'shit\', \',\', \'they\', \'even\', \'gave\', \'one\', \'to\', \'jimmy\', \'carter\', \'ha\', \'.\', \'it\', \'should\', \'be\', \'called\', "\'\'", \'the\', \'worst\', \'president\', "\'\'", \'prize\', \'.\']',
"['musicmonday', 'britney', 'spears', '-', 'lucky', 'do', 'you', 'remember', 'this', 'song', '?', 'it', '`', 's', 'awesome', '.', 'i', 'love', 'it', '.']",
"['wtf', '?', 'hilary', 'swank', 'is', 'coming', 'to', 'my', 'school', 'today', ',', 'just', 'to', 'chill', '.', 'lol', 'wow']",
'[\'my\', \'3-year-old\', \'was\', \'amazed\', \'yesterday\', \'to\', \'find\', \'that\', "\'", \'real\', "\'", \'10\', \'pin\', \'bowling\', \'is\', \'nothing\', \'like\', \'it\', \'is\', \'on\', \'the\', \'wii\', \'...\']',
"['God', 'damn', '.', 'That', 'Sony', 'remote', 'for', 'google', 'is', 'fucking', 'hideeeeeous', '!']"]
How I can extract lists properly from this csv file to get the correct structure:
[['i', 'agree', 'about', 'arafat', '.', 'i', 'mean', ',', 'shit', ',', 'they', 'even', 'gave', 'one', 'to', 'jimmy', 'carter', 'ha', '.', 'it', 'should', 'be', 'called', "''", 'the', 'worst', 'president', "''", 'prize', '.'],
['musicmonday', 'britney', 'spears', '-', 'lucky', 'do', 'you', 'remember', 'this', 'song', '?', 'it', '`', 's', 'awesome', '.', 'i', 'love', 'it', '.'],
['wtf', '?', 'hilary', 'swank', 'is', 'coming', 'to', 'my', 'school', 'today', ',', 'just', 'to', 'chill', '.', 'lol', 'wow'],
['my', '3-year-old', 'was', 'amazed', 'yesterday', 'to', 'find', 'that', "'", 'real', "'", '10', 'pin', 'bowling', 'is', 'nothing', 'like', 'it', 'is', 'on', 'the', 'wii', '...'],
['God', 'damn', '.', 'That', 'Sony', 'remote', 'for', 'google', 'is', 'fucking', 'hideeeeeous', '!']]
You can find the original dataset file here: https://github.com/1tangerine1day/Aspect-Term-Extraction-and-Analysis/tree/master/data
Update:
I tried another way but have the same problem:
import csv
with open('twitter_train.csv', newline='') as f:
reader = csv.reader(f)
data = list(reader)
Another incorrect output:
print(data[3])
["['wtf', '?', 'hilary', 'swank', 'is', 'coming', 'to', 'my', 'school', 'today', ',', 'just', 'to', 'chill', '.', 'lol', 'wow']", '[0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]', '[-1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]']
Thanks in advance!
Your info in your csv is actually a string not a list. You need to make them actual lists.
twitter_train = pd.read_csv('twitter_train.csv')
twitter_train['Tokens'] = list(twitter_train['Tokens'].str.strip("['").str.rstrip("']").str.split("', '"))
Updated
I am working on the word embedding model for answer Matching score prediction using Tflearn. I have to build a model using sentence vector using tflearn dnn classifier, Now I have to add a word embedding layer to the dnn model. How to do that? Thanks in advance.
"JVMdefines": enables a computer to run a Java program
is coverted as :
"JVMdefines": [[list([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
enables a computer to run a Java program :
list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
My question: Is there any method that the machine can able to analyze.
enables a "machine" to run a Java program
That is It can detect computer and machine as in same meaning.
I would post a clarifying comment, but I do not have enough reputation to do so, so I will try to answer given the information you have presented in the original question...
Your problem seems unclear, but here is how you would do this for a binary classification problem in tflearn.
Step 1: Preprocessing
First thing you need to do is to tokenize and transform your sentences into list of integers:
"What kind of food do you like?" ---> [234,64,12,5224,43,96,23]
Then, most people pad their sequences to all be the same length, cutting off the long ones or increasing the length of short ones by padding with 0's.
[234,64,12,5224,43,96,23] ---> [0,0,0,0....234,64,12,5224,43,96,23]
Hint:
from tflearn.data_utils import pad_sequences
padded = pad_sequences(unpadded, maxlen=max_document_length, value=0.)
Step 2: Model Building
After you transform all the text you have into integer sequences, you can build the model. Note here that our input shape is [None, max_document_length]. None means optional size (allows for variable batch size), and max_document_length is the length of our sequences that we padded previously.
#Create our model
network = input_data(shape=[None, max_document_length], name='input')
Create embedding matrix. Note that you push the embedding matrix to the CPU. The input dim parameter is looking for an integer that represents the size of your vocabulary. the output_dim is the size of your embedding.
with tf.device('/cpu:0'):
network = tflearn.embedding(network, input_dim=vocabulary_size, output_dim=128)
#Pass embeddings into an lstm layer (handles sequential problems)
network = tflearn.lstm(network, 512, dropout=0.8)
#Squish data into a fully connected layer, with 2 outputs for binary classification
network = tflearn.fully_connected(network, 2, activation='softmax')
#Perform regression to get the final anaswer
network = tflearn.regression(network, optimizer='rmsprop', learning_rate=0.001,
loss='categorical_crossentropy')
#Wrap the graph we just created in a tflearn DNN wrapper
model = tflearn.DNN(network)
#Run model.fit to actually train your model
model.fit(x_train, y_train, n_epoch=15, shuffle=True, validation_set=(x_val, y_val), show_metric=True, batch_size=batch_size)
import numpy as np
from hmmlearn.hmm import MultinomialHMM
startprob_prior = np.array([0.5, 0.5]) # guess
transmat_prior = np.array([[0.9, 0.1], [0.3, 0.7]]) # guess
#data is binary, 0\1 with bursts of 1's
x = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] # data
x = np.array(x).reshape(-1,1) # make it in the desirable format
hmm = MultinomialHMM(n_components=2, verbose=True, startprob_prior=startprob_prior, transmat_prior=transmat_prior)
hmm.fit(x)
print(hmm.monitor_.converged) # returns True
print(hmm.transmat_) # returns 2x2 matrix of NaN
Why doesn't it converges? clearly the 1's comes in bulks.
see issue 137
The solution was to tell the model not to initialize the emission rate (model.init_params = 'st'
) + set it up by setting the private attribute startprob_.
Now it seems like working! - red is state, blue is observation :
import numpy as np
from hmmlearn.hmm import MultinomialHMM
import hmmlearn
start_probability = np.array([0.9, 0.1]) # guess
transition_probability = np.array([[0.9, 0.1], [0.1, 0.9]])
emission_probability = np.array([[0.9, 0.1], [0.1, 0.9]])
model = MultinomialHMM(n_components=2, verbose=True, n_iter=1000, tol=1e-3)
model.startprob = start_probability
model.transmat = transition_probability
model.emissionprob_ = emission_probability # notice here the init is to the internal variable emissionprob_ and not
model.init_params = 'st'
# data is binary, 0\1 with bursts of 1's
x = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0] # data
x = np.array(x).reshape(-1, 1) # make it in the desirable format
model.fit(x)
print(model.monitor_.converged) # returns True
print(model.transmat_) # returns 2x2 matrix of NaN
print(model.emissionprob_) # returns 2x2 matrix of NaN
print(model.startprob_) # returns 2x2 matrix of NaN
logprob, estimated_states = model.decode(x, algorithm="viterbi")
import matplotlib.pyplot as plt
plt.stem(x, label='observation')
plt.plot(estimated_states, label='hidden states', color='red')
plt.show()