KeyError: "['green_picture', 'yellow_green_picture', 'yellow_picture'] not in index" - indexing

That my code
#Select your X variables based on your hypothesis : mainvariables + control variables
# Add your main x variables here - based on your hypothesis
main_x_names = ['Green_color','Yellow_green_color','Yellow_color']
# Add your control variables here. Default value is empty. You can leave it empty.
control_x_names = ['length_description','Valence_overall', 'ocr_Valence_overall', 'ocr_length_description']
# Your control variables should be your selected variables + time variables + brand variables
# e.g., datetime_features.columns.to_list() helps to get the column names of datetime_features and put them in a list
control_x_names = control_x_names + datetime_features.columns.to_list()
# Decide your X variables : main X variables + control variables
x_names = main_x_names + control_x_names
That my error.I don't know if the formatting is wrong or what, only this column is not recognized. When I printed it I got this.
Y=np.log(test_data1['likeCount']+1)
X =test_data1[x_names]
X=sm.add_constant(X)
model=sm.OLS(Y,X)
results=model.fit()
KeyError Traceback (most recent call last)
<ipython-input-93-a5ec2e24ee54> in <module>()
1 Y=np.log(test_data1['likeCount']+1)
----> 2 X =test_data1[x_names]
3 X=sm.add_constant(X)
4 model=sm.OLS(Y,X)
5 results=model.fit()
2 frames
/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in _validate_read_indexer(self, key, indexer, axis)
1375
1376 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
-> 1377 raise KeyError(f"{not_found} not in index")
1378
1379
This was my mistake, I did not copy the key error into the code box.The final key error should be:KeyError: "['Green_color', 'Yellow_green_color', 'Yellow_color'] not in index"
I don't know what the problem is.

Related

How to debug a "IndexError: invalid index to scalar variable" error in Python?

This is my code:
import matplotlib.patches as pat
oval = pat.Ellipse(v1_mean,v2_mean,v1_std*2,v2_std*2)
fig,graph = plt.subplots()
graph.scatter(v1,v2)
graph.scatter(v1_mean,v2_mean, s=100)
graph.text(v1_mean,v2_mean, 'Mean')
graph.add_patch(oval)
And this is the error that comes:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-40-2278a0e6f4cf> in <module>()
7 graph.scatter(v1_mean,v2_mean, s=100)
8 graph.text(v1_mean,v2_mean, 'Mean')
----> 9 graph.add_patch(oval)
10
11 graph.xlabel('V1')
/opt/conda/lib/python3.6/site-packages/matplotlib/axes/_base.py in add_patch(self, p)
2033 if p.get_clip_path() is None:
2034 p.set_clip_path(self.patch)
-> 2035 self._update_patch_limits(p)
2036 self.patches.append(p)
2037 p._remove_method = lambda h: self.patches.remove(h)
/opt/conda/lib/python3.6/site-packages/matplotlib/axes/_base.py in _update_patch_limits(self, patch)
2053 vertices = patch.get_path().vertices
2054 if vertices.size > 0:
-> 2055 xys = patch.get_patch_transform().transform(vertices)
2056 if patch.get_data_transform() != self.transData:
2057 patch_to_data = (patch.get_data_transform() -
/opt/conda/lib/python3.6/site-packages/matplotlib/patches.py in get_patch_transform(self)
1492
1493 def get_patch_transform(self):
-> 1494 self._recompute_transform()
1495 return self._patch_transform
1496
/opt/conda/lib/python3.6/site-packages/matplotlib/patches.py in _recompute_transform(self)
1476 not directly access the transformation member variable.
1477 """
-> 1478 center = (self.convert_xunits(self.center[0]),
1479 self.convert_yunits(self.center[1]))
1480 width = self.convert_xunits(self.width)
IndexError: invalid index to scalar variable.
Basically, what I am trying to do is plot an oval shape and some data into the same graph. But it seems like the error has got to do with the center of the oval, but I dont know what is exactly wrong. It's strange that I followed exactly what the teacher has done, but mine came with an error while his is ok.
It's strange that I followed exactly what the teacher has done, but mine came with an error while his is ok.
Probably you didn't follow exactly. According to the documentation of matplotlib.patches.Ellipse the xy coordinates of ellipse centre are to be given as a tuple rather than individual arguments, so it's not
oval = pat.Ellipse(v1_mean,v2_mean,v1_std*2,v2_std*2)
but
oval = pat.Ellipse((v1_mean, v2_mean), v1_std*2, v2_std*2)
instead. Unfortunately Ellipse didn't warn about this and stored a single number as the ellipse center.

modifying a dataframe by adding additional if statement column

Modifying a data frame by adding an additional column with if statement.
I created 5 lists namely: East_Asia, Central_Asia,Central_America,South_America, Europe_East & Europe_West. And I wanted to add a conditional column based on existing column. i.e if japan in Central_East, then the japan row in the adding column should contain Central East, so on.
df['native_region'] =df["native_country"].apply(lambda x: "Asia-East" if x in 'Asia_East'
"Central-Asia" elif x in "Central_Asia"
"South-America" elif x in "South_America"
"Europe-West" elif x in "Europe_West"
"Europe-East" elif x in "Europe_East"
"United-States" elif x in "
United-States"
else "Outlying-US"
)
File "", line 2
"Central-Asia" elif x in "Central_Asia"
^
SyntaxError: invalid syntax
I might be wrong, but I think you're taking the problem the wrong way around.
What you seem to be doing there is just to replace '_' by '-', which you can do with the following line:
df['native_region'] = df.native_country.str.replace('_', '-')
And then, in my experience, it's more understandable to work like that :
known_countries = ['Asia-East', 'Central-Asia', 'South-America', ...]
is_known = df['native_country'].isin(known_countries )
df.native_region[~known_countries] = 'Outlying-US'
This could work also if you worked with countries like :
east_asia_countries = ['Japan', 'China', 'Korea']
isin_east_asia = df['native_country'].isin(east_asia_countries)
df.native_region[known_countries] = 'East-Asia'

PuLP - COIN-CBC error: How to add constraint with double inequality and relaxation?

I want to add this set of constraints:
-M(1-X_(i,j,k,n) )≤S_(i,j,k,n)-ToD_(i,j,k,n)≤M(1-X_(i,j,k,n) ) ∀i,j,k,n
Where M is a big number, S is a integer variable that takes values between 0 and 1440. ToD is a 4-dimensional matrix that takes values from an Excel sheet. X i dual variable, it takes as values 0-1.
I try to implement in code as following:
for n in range(L):
for k in range(M):
for i in range(N):
for j in range(N):
if (i != START_POINT_S & i != END_POINT_T & j != START_POINT_S & j != END_POINT_T):
prob += (-BIG_NUMBER*(1-X[i][j][k][n])) <= (S[i][j][k][n] - ToD[i][j][k][n]), ""
and another constraint as follows:
for i in range(N):
for j in range(N):
for k in range(M):
for n in range(L):
if (i != START_POINT_S & i != END_POINT_T & j != START_POINT_S & j != END_POINT_T):
prob += S[i][j][k][n] - ToD[i][j][k][n] <= BIG_NUMBER*(1-X[i][j][k][n]), ""
According to my experience, in code, those two constraints are totally equivalent to what we want. The problem is that PuLP and CBC won't accept them. The produce the following errors:
PuLP:
Traceback (most recent call last):
File "basic_JP.py", line 163, in <module>
prob.solve()
File "C:\Users\dimri\Desktop\Filesystem\Projects\deliverable_B4\lib\site-packa
ges\pulp\pulp.py", line 1643, in solve
status = solver.actualSolve(self, **kwargs)
File "C:\Users\dimri\Desktop\Filesystem\Projects\deliverable_B4\lib\site-packa
ges\pulp\solvers.py", line 1303, in actualSolve
return self.solve_CBC(lp, **kwargs)
File "C:\Users\dimri\Desktop\Filesystem\Projects\deliverable_B4\lib\site-packa
ges\pulp\solvers.py", line 1366, in solve_CBC
raise PulpSolverError("Pulp: Error while executing "+self.path)
pulp.solvers.PulpSolverError: Pulp: Error while executing C:\Users\dimri\Desktop
\Filesystem\Projects\deliverable_B4\lib\site-packages\pulp\solverdir\cbc\win\64\
cbc.exe
and CBC:
Welcome to the CBC MILP Solver
Version: 2.9.0
Build Date: Feb 12 2015
command line - C:\Users\dimri\Desktop\Filesystem\Projects\deliverable_B4\lib\sit
e-packages\pulp\solverdir\cbc\win\64\cbc.exe 5284-pulp.mps branch printingOption
s all solution 5284-pulp.sol (default strategy 1)
At line 2 NAME MODEL
At line 3 ROWS
At line 2055 COLUMNS
Duplicate row C0000019 at line 10707 < X0001454 C0000019 -1.000000000000e+
00 >
Duplicate row C0002049 at line 10708 < X0001454 C0002049 -1.000000000000e+
00 >
Duplicate row C0000009 at line 10709 < X0001454 C0000009 1.000000000000e+
00 >
Duplicate row C0001005 at line 10710 < X0001454 C0001005 1.000000000000e+
00 >
At line 14153 RHS
At line 16204 BOUNDS
Bad image at line 17659 < UP BND X0001454 1.440000000000e+03 >
At line 18231 ENDATA
Problem MODEL has 2050 rows, 2025 columns and 5968 elements
Coin0008I MODEL read with 5 errors
There were 5 errors on input
** Current model not valid
Option for printingOptions changed from normal to all
** Current model not valid
No match for 5284-pulp.sol - ? for list of commands
Total time (CPU seconds): 0.02 (Wallclock seconds): 0.02
I don't know what's the problem, any help? I am new to this, if information are not enough let me know what I should add.
Alright, I have searched for hours, but right after I posted this question I found the answer. These kinds of problems are mainly because of the names of the variables or the constraints. That is what caused something to duplicate. I am really not used to that kind of software that is why it took me so long to find and answer. Anyway, the problem for me was when I was defining the variables:
# define X[i,j,k,n]
lower_bound_X = 0 # lower bound for variable X
upper_bound_X = 1 # upper bound for variable X
X = LpVariable.dicts(name="X",
indexs=(range(N), range(N), range(M), range(L)),
lowBound=lower_bound_X,
upBound=upper_bound_X,
cat=LpInteger)
and
# define S[i,j,k,n]
lower_bound_S = 0 # lower bound for variable S
upper_bound_S = 1440 # upper bound for variable S
S = LpVariable.dicts(name="X",
indexs=(range(N),
range(N), range(M), range(L)),
lowBound=lower_bound_S,
upBound=upper_bound_S,
cat=LpInteger)
As you see in the definition of S I obviously forgot to change the name of the variable to S because I copy-pasted it. Anyway, the right way to define S is like this:
# define S[i,j,k,n]
lower_bound_S = 0 # lower bound for variable S
upper_bound_S = 1440 # upper bound for variable S
S = LpVariable.dicts(name="S",
indexs=(range(N), range(N), range(M), range(L)),
lowBound=lower_bound_S,
upBound=upper_bound_S,
cat=LpInteger)
This is how I got my code running.

IPython doesn't answer when calling self-defined function

def nast(L):
i=len(L)-1
while L != [1 for i in range(len(L))]:
if L[i]==0:
L[i]=1
break
i=i-1
for j in range(i+1,len(L)):
L[j]=0
return L
L = [0,0,1,0,1]
I would like to give this function 'L' list, but when I do this, I get nothing, IPython kernel seems to be frozen; when I use "Interrupt current kernet" option, I get:
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-3-000635d72af9> in <module>()
----> 1 nast(L)
<ipython-input-1-7918814a171f> in nast(L)
1 def nast(L):
2 i=len(L)-1
----> 3 while L != [1 for i in range(len(L))]:
4 if L[i]==0:
5 L[i]=1
KeyboardInterrupt:
I wonder what is wrong, thank you for help in advance.
When you do this:
while L != [1 for i in range(len(L))]:
The i variable leaks out of the list comprehension, so after that line, i is always len(L)-1, and your while loop is always checking the last item in L.
This was fixed in Python 3, so your code works there (at least, it finishes - I don't know if it's doing what you expect). To do it in Python 2, you'll need to call one of your i variables something else.

Printing out a binary search tree with slashes

http://pastebin.com/dN9a9xfs
That's my code to print out the elements of a binary search tree. The goal is to display it in level order, with slashes connecting the parent to each child. So for instance, the sequence 15 3 16 2 1 4 19 17 28 31 12 14 11 0 would display after execution as:
15
/ \
3 16
/ \ \
2 4 19
/ \ / \
1 12 17 28
/ / \ \
0 11 14 31
I've been working on it for a long time now, but I just can't seem to get the spacing/indentation right. I know I wrote the proper algorithm for displaying the nodes in the proper order, but the slashes are just off. This is the result of my code as is: http://imgur.com/sz8l1
I know I'm so close to the answer, since my display is not that far off from what I need, and I have a feeling it's a really simple solution, but for some reason I just seem to get it right.
I'm out of time for now, but here's a quick version. I did not read your code (don't know C++), so I don't know how close our solutions are.
I changed the output format slightly. Instead of / for the left node, I used | so I didn't have to worry about left spacing at all.
15
| \
3 16
|\ \
2 4 19
| \ | \
1 | 17 28
| | \
0 12 31
| \
11 14
Here's the code. I hope you're able to take what you need from it. There are definitely some Pythonisms which I hope map to what you're using. The main idea is to treat each row of numbers as a map of position to node object, and at each level, sort the map by key and print them to the console iteratively based on their assigned position. Then generate a new map with positions relative to their parents in the previous level. If there's a collision, generate a fake node to bump the real node down a line.
from collections import namedtuple
# simple node representation. sorry for the mess, but it does represent the
# tree example you gave.
Node = namedtuple('Node', ('label', 'left', 'right'))
def makenode(n, left=None, right=None):
return Node(str(n), left, right)
root = makenode(
15,
makenode(
3,
makenode(2, makenode(1, makenode(0))),
makenode(4, None, makenode(12, makenode(11), makenode(14)))),
makenode(16, None, makenode(19, makenode(17),
makenode(28, None, makenode(31)))))
# takes a dict of {line position: node} and returns a list of lines to print
def print_levels(print_items, lines=None):
if lines is None:
lines = []
if not print_items:
return lines
# working position - where we are in the line
pos = 0
# line of text containing node labels
new_nodes_line = []
# line of text containing slashes
new_slashes_line = []
# args for recursive call
next_items = {}
# sort dictionary by key and put them in a list of pairs of (position,
# node)
sorted_pos_and_node = [
(k, print_items[k]) for k in sorted(print_items.keys())]
for position, node in sorted_pos_and_node:
# add leading whitespace
while len(new_nodes_line) < position:
new_nodes_line.append(' ')
while len(new_slashes_line) < position:
new_slashes_line.append(' ')
# update working position
pos = position
# add node label to string, as separate characters so list length
# matches string length
new_nodes_line.extend(list(node.label))
# add left child if any
if node.left is not None:
# if we're close to overlapping another node, push that node down
# by adding a parent with label '|' which will make it look like a
# line dropping down
for collision in [pos - i for i in range(3)]:
if collision in next_items:
next_items[collision] = makenode(
'|', next_items[collision])
# add the slash and the node to the appropriate places
new_slashes_line.append('|')
next_items[position] = node.left
else:
new_slashes_line.append(' ')
# update working position
len_num = len(node.label)
pos += len_num
# add some more whitespace
while len(new_slashes_line) < position + len_num:
new_slashes_line.append(' ')
# and take care of the right child
if node.right is not None:
new_slashes_line.append('\\')
next_items[position + len_num + 1] = node.right
else:
new_slashes_line.append(' ')
# concatenate each line's components and append them to the list
lines.append(''.join(new_nodes_line))
lines.append(''.join(new_slashes_line))
# do it again!
return print_levels(next_items, lines)
lines = print_levels({0: root})
print '\n'.join(lines)