How to separate all of the columns?
df= df[['hlogUs_dB','hlogDs_dB']]
df
hlogUs_dB hlogDs_dB
0 109:-3.4,110:-3.4,111:-3.4,112:-3.5,113:-3.5,1... 5:-2.5,6:-2.5,7:-2.1,8:-2.0,9:-2.0,10:-2.0,11:...
1 109:-3.5,110:-3.5,111:-3.4,112:-3.4,113:-3.4,1... 5:-2.1,6:-2.0,7:-1.8,8:-1.8,9:-1.8,10:-1.8,11:...
2 109:-3.7,110:-3.7,111:-3.8,112:-3.8,113:-3.8,1... 5:-2.1,6:-2.0,7:-1.8,8:-1.8,9:-1.8,10:-1.8,11:...
3 109:-3.5,110:-3.6,111:-3.6,112:-3.6,113:-3.7,1... 5:-2.5,6:-2.5,7:-2.1,8:-2.0,9:-2.0,10:-2.0,11:...
4 109:-3.7,110:-3.8,111:-3.8,112:-3.8,113:-3.8,1... 5:-2.5,6:-2.5,7:-2.1,8:-2.1,9:-2.0,10:-2.1,11:...
... ... ...
165 109:-5.2,110:-5.3,111:-5.5,112:-5.7,113:-5.9,1... 5:-2.5,6:-2.5,7:-2.1,8:-2.1,9:-2.1,10:-2.2,11:...
166 109:-5.5,110:-5.6,111:-5.8,112:-6.1,113:-6.3,1... 5:-2.8,6:-2.7,7:-2.5,8:-2.5,9:-2.3,10:-2.5,11:...
167 109:-6.0,110:-6.2,111:-6.4,112:-6.7,113:-7.1,1... 5:-2.6,6:-2.5,7:-2.2,8:-2.2,9:-2.2,10:-2.3,11:...
168 109:-5.4,110:-5.5,111:-5.7,112:-5.9,113:-6.2,1... 5:-3.0,6:-3.0,7:-2.6,8:-2.5,9:-2.5,10:-2.5,11:...
169 109:-5.9,110:-6.1,111:-6.4,112:-6.6,113:-7.0,1... 5:-2.7,6:-2.5,7:-2.3,8:-2.2,9:-2.3,10:-2.3,11:...
170 rows × 2 columns
<After that I split using delimiter for only hlogUs_dB/>
df2 =df['hlogUs_dB'].str.split('[,:]',expand = True)
df2 = data.drop(["0"])
df2
The result :
0 1 2 3 4 5 6 7 8 9 ... 276 277 278 279 280 281 282 283 284 285
0 109 -3.4 110 -3.4 111 -3.4 112 -3.5 113 -3.5 ... 343 -4.3 344 -4.3 345 -4.2 346 -4.2 347 -4.2
1 109 -3.5 110 -3.5 111 -3.4 112 -3.4 113 -3.4 ... 343 -4.1 344 -4.2 345 -4.4 346 -4.4 347 -4.2
2 109 -3.7 110 -3.7 111 -3.8 112 -3.8 113 -3.8 ... 343 -4.2 344 -4.3 345 -4.3 346 -4.3 347 -4.3
3 109 -3.5 110 -3.6 111 -3.6 112 -3.6 113 -3.7 ... 343 -4.1 344 -4.1 345 -4.1 346 -4.1 347 -4.1
4 109 -3.7 110 -3.8 111 -3.8 112 -3.8 113 -3.8 ... 343 -4.2 344 -4.2 345 -4.2 346 -4.2 347 -4.3
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
165 109 -5.2 110 -5.3 111 -5.5 112 -5.7 113 -5.9 ... 343 -5.4 344 -5.3 345 -5.2 346 -5.1 347 -5.1
166 109 -5.5 110 -5.6 111 -5.8 112 -6.1 113 -6.3 ... 343 -5.5 344 -5.4 345 -5.3 346 -5.2 347 -5.2
167 109 -6.0 110 -6.2 111 -6.4 112 -6.7 113 -7.1 ... 343 -4.9 344 -4.9 345 -4.9 346 -4.9 347 -4.9
168 109 -5.4 110 -5.5 111 -5.7 112 -5.9 113 -6.2 ... 343 -5.9 344 -5.7 345 -5.7 346 -5.6 347 -5.6
169 109 -5.9 110 -6.1 111 -6.4 112 -6.6 113 -7.0 ... 343 -5.7 344 -5.7 345 -5.7 346 -5.6 347 -5.6
170 rows × 286 columns
After that I want to drop the same number that appear only on even columns. I manage to found the solutions but somehow or rather, it does not suit my preference.
df2.drop(columns=[0,2,4,6,8,9,10,12,14,16,18,20,22,24,26,28,30,32,34,36])
df2
the output:
0 1 2 3 4 5 6 7 8 9 ... 276 277 278 279 280 281 282 283 284 285
0 109 -3.4 110 -3.4 111 -3.4 112 -3.5 113 -3.5 ... 343 -4.3 344 -4.3 345 -4.2 346 -4.2 347 -4.2
1 109 -3.5 110 -3.5 111 -3.4 112 -3.4 113 -3.4 ... 343 -4.1 344 -4.2 345 -4.4 346 -4.4 347 -4.2
2 109 -3.7 110 -3.7 111 -3.8 112 -3.8 113 -3.8 ... 343 -4.2 344 -4.3 345 -4.3 346 -4.3 347 -4.3
3 109 -3.5 110 -3.6 111 -3.6 112 -3.6 113 -3.7 ... 343 -4.1 344 -4.1 345 -4.1 346 -4.1 347 -4.1
4 109 -3.7 110 -3.8 111 -3.8 112 -3.8 113 -3.8 ... 343 -4.2 344 -4.2 345 -4.2 346 -4.2 347 -4.3
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
165 109 -5.2 110 -5.3 111 -5.5 112 -5.7 113 -5.9 ... 343 -5.4 344 -5.3 345 -5.2 346 -5.1 347 -5.1
166 109 -5.5 110 -5.6 111 -5.8 112 -6.1 113 -6.3 ... 343 -5.5 344 -5.4 345 -5.3 346 -5.2 347 -5.2
167 109 -6.0 110 -6.2 111 -6.4 112 -6.7 113 -7.1 ... 343 -4.9 344 -4.9 345 -4.9 346 -4.9 347 -4.9
168 109 -5.4 110 -5.5 111 -5.7 112 -5.9 113 -6.2 ... 343 -5.9 344 -5.7 345 -5.7 346 -5.6 347 -5.6
169 109 -5.9 110 -6.1 111 -6.4 112 -6.6 113 -7.0 ... 343 -5.7 344 -5.7 345 -5.7 346 -5.6 347 -5.6
170 rows × 286 columns
still show the same as before, I just want the odd columns to be multiply with 8 and float 4.3125. Then later the data will replace on the same columns, that was my roughly ideas.
df2*4.3125
the results contain error.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in na_arithmetic_op(left, right, op, str_rep)
148 try:
--> 149 result = expressions.evaluate(op, str_rep, left, right)
150 except TypeError:
~\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in evaluate(op, op_str, a, b, use_numexpr)
207 if use_numexpr:
--> 208 return _evaluate(op, op_str, a, b)
209 return _evaluate_standard(op, op_str, a, b)
~\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_numexpr(op, op_str, a, b)
120 if result is None:
--> 121 result = _evaluate_standard(op, op_str, a, b)
122
~\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_standard(op, op_str, a, b)
69 with np.errstate(all="ignore"):
---> 70 return op(a, b)
71
TypeError: can't multiply sequence by non-int of type 'float'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-24-424060d3aad6> in <module>
----> 1 df2*4.3125
~\anaconda3\lib\site-packages\pandas\core\ops\__init__.py in f(self, other, axis, level, fill_value)
717 self = self.fillna(fill_value)
718
--> 719 new_data = dispatch_to_series(self, other, op, str_rep)
720 return self._construct_result(new_data)
721
~\anaconda3\lib\site-packages\pandas\core\ops\__init__.py in dispatch_to_series(left, right, func, str_rep, axis)
376 # Get the appropriate array-op to apply to each block's values.
377 array_op = get_array_op(func, str_rep=str_rep)
--> 378 bm = left._data.apply(array_op, right=right)
379 return type(left)(bm)
380
~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, filter, **kwargs)
438
439 if callable(f):
--> 440 applied = b.apply(f, **kwargs)
441 else:
442 applied = getattr(b, f)(**kwargs)
~\anaconda3\lib\site-packages\pandas\core\internals\blocks.py in apply(self, func, **kwargs)
388 """
389 with np.errstate(all="ignore"):
--> 390 result = func(self.values, **kwargs)
391
392 if is_extension_array_dtype(result) and result.ndim > 1:
~\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in arithmetic_op(left, right, op, str_rep)
195 else:
196 with np.errstate(all="ignore"):
--> 197 res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep)
198
199 return res_values
~\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in na_arithmetic_op(left, right, op, str_rep)
149 result = expressions.evaluate(op, str_rep, left, right)
150 except TypeError:
--> 151 result = masked_arith_op(left, right, op)
152
153 return missing.dispatch_fill_zeros(op, left, right, result)
~\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in masked_arith_op(x, y, op)
110 if mask.any():
111 with np.errstate(all="ignore"):
--> 112 result[mask] = op(xrav[mask], y)
113
114 result, _ = maybe_upcast_putmask(result, ~mask, np.nan)
TypeError: can't multiply sequence by non-int of type 'float'
I am stuck at the area and have searched on the Stack Overflow, youtube about the basic for multiplying in terms of float but, I think my keywords is not on par with the ideas.
I'm needing to sort a long list of ID numbers into 'grids' of 8 ID numbers down (8 cells/rows), 6 ID numbers across (or 6 columns long etc), sorted from smallest to largest ID number. When one 'grid' is 'full', the numbers which cannot fit in the first grid should go on to form a second one and so on. The last 4 cells of the last row should be blank. (This is a template for a lab procedure).
ie this is the data I have:
column of ID numbers
and this how I want it to be (but like, 6 of these)
example 'grid'
Here's one method.
Sample data
import pandas as pd
import numpy as np
# Sorted list of string IDs
l = np.arange(0, 631, 1).astype('str')
Code
N = 44
# Ensure we can reshape last group
data = np.concatenate((l, np.repeat('', N-len(l)%N)))
# Split array, make a separate `DataFrame` for each grid.
data = [
pd.DataFrame(np.concatenate((x, np.repeat('', 4))).reshape(8,6))
for x in np.array_split(data, np.arange(N, len(l), N))
]
df = pd.concat(data, ignore_index=True) # If want a single df in the end
Output df:
0 1 2 3 4 5
0 0 1 2 3 4 5
1 6 7 8 9 10 11
2 12 13 14 15 16 17
3 18 19 20 21 22 23
4 24 25 26 27 28 29
5 30 31 32 33 34 35
6 36 37 38 39 40 41
7 42 43
8 44 45 46 47 48 49
9 50 51 52 53 54 55
10 56 57 58 59 60 61
11 62 63 64 65 66 67
12 68 69 70 71 72 73
13 74 75 76 77 78 79
14 80 81 82 83 84 85
15 86 87
16 88 89 90 91 92 93
...
110 608 609 610 611 612 613
111 614 615
112 616 617 618 619 620 621
113 622 623 624 625 626 627
114 628 629 630
115
116
117
118
119
func = lambda lst,n: np.pad(lst, (0,n*(1+len(lst)//n) - len(lst)), 'constant')
rows, cols = 8, 6
arr = np.arange(1, 283, 1) ##np.array(df.A)
new_df = pd.DataFrame(func(arr, rows*cols).reshape(-1,cols))
new_df
0 1 2 3 4 5
0 1 2 3 4 5 6
1 7 8 9 10 11 12
2 13 14 15 16 17 18
3 19 20 21 22 23 24
4 25 26 27 28 29 30
5 31 32 33 34 35 36
6 37 38 39 40 41 42
7 43 44 45 46 47 48
8 49 50 51 52 53 54
9 55 56 57 58 59 60
10 61 62 63 64 65 66
11 67 68 69 70 71 72
12 73 74 75 76 77 78
13 79 80 81 82 83 84
14 85 86 87 88 89 90
15 91 92 93 94 95 96
16 97 98 99 100 101 102
17 103 104 105 106 107 108
18 109 110 111 112 113 114
19 115 116 117 118 119 120
20 121 122 123 124 125 126
21 127 128 129 130 131 132
22 133 134 135 136 137 138
23 139 140 141 142 143 144
24 145 146 147 148 149 150
25 151 152 153 154 155 156
26 157 158 159 160 161 162
27 163 164 165 166 167 168
28 169 170 171 172 173 174
29 175 176 177 178 179 180
30 181 182 183 184 185 186
31 187 188 189 190 191 192
32 193 194 195 196 197 198
33 199 200 201 202 203 204
34 205 206 207 208 209 210
35 211 212 213 214 215 216
36 217 218 219 220 221 222
37 223 224 225 226 227 228
38 229 230 231 232 233 234
39 235 236 237 238 239 240
40 241 242 243 244 245 246
41 247 248 249 250 251 252
42 253 254 255 256 257 258
43 259 260 261 262 263 264
44 265 266 267 268 269 270
45 271 272 273 274 275 276
46 277 278 279 280 281 282
47 0 0 0 0 0 0
I think it's better to save this dataframe into an excel worksheet and then remove the last padded zeros manually. Hope this helped
dataframe as follows:
time a b c d e
2006/1/16 249 249 250 250 251
2006/2/15 254 253 255 255 255
2006/3/16 261 261 262 262 264
2006/4/16 272 271 273 273 274
2006/5/16 282 281 283 283 283
2006/6/16 288 287 289 289 289
2006/7/16 292 292 293 293 293
2006/8/16 290 290 291 291 292
2006/9/16 282 281 283 283 284
2006/10/16 271 270 272 272 273
2006/11/16 259 258 260 260 261
2006/12/16 251 251 252 252 253
2007/1/16 247 247 247 248 250
2007/2/15 253 253 254 254 255
2007/3/16 261 261 262 262 264
2007/4/16 273 272 274 274 275
2007/5/16 282 281 283 283 283
2007/6/16 288 288 290 289 290
2007/7/16 292 292 293 293 294
2007/8/16 291 290 291 291 292
2007/9/16 282 282 283 283 284
2007/10/16 271 270 272 272 273
2007/11/16 260 259 261 261 262
I want to unstack as
a 1 2 3 4 5 6 7 8 9 10 11 12
2006 .......................................
2007 .......................................
b 2006 .......................................
2007 .......................................
.......................................
c 2006
d ...............................................
e 2007 .......................................
pandas timestamps cound be apply on it? And how to generate year and month index if there is no time columns.
year month
2006 1
2006 2
... ..
2006 12
2007 1
2007 2
... ...
2007 12
I'd construct a new pd.Series from numpy arrays and unstack
df.time = pd.to_datetime(df.time)
cols = list('abcde')
n, m = len(df), len(cols)
v = np.concatenate([df[c].values for c in cols])
i = np.repeat(cols, n)
y = np.tile(df.time.dt.year.values, m)
m = np.tile(df.time.dt.month.values, m)
pd.Series(v, pd.MultiIndex.from_arrays([i, y, m])).unstack(fill_value=0)
1 2 3 4 5 6 7 8 9 10 11 12
a 2006 249 254 261 272 282 288 292 290 282 271 259 251
2007 247 253 261 273 282 288 292 291 282 271 260 0
b 2006 249 253 261 271 281 287 292 290 281 270 258 251
2007 247 253 261 272 281 288 292 290 282 270 259 0
c 2006 250 255 262 273 283 289 293 291 283 272 260 252
2007 247 254 262 274 283 290 293 291 283 272 261 0
d 2006 250 255 262 273 283 289 293 291 283 272 260 252
2007 248 254 262 274 283 289 293 291 283 272 261 0
e 2006 251 255 264 274 283 289 293 292 284 273 261 253
2007 250 255 264 275 283 290 294 292 284 273 262 0
Use to_datetime first, then create MultiIndex.from_arrays with year and
month and assign to index. Then remove column time and unstack, last transpose by T:
df['time'] = pd.to_datetime(df['time'])
df.index = pd.MultiIndex.from_arrays([df['time'].dt.month, df['time'].dt.year],
names=(None, None))
df = df.drop('time', axis=1).unstack(fill_value=0).T
print (df)
1 2 3 4 5 6 7 8 9 10 11 12
a 2006 249 254 261 272 282 288 292 290 282 271 259 251
2007 247 253 261 273 282 288 292 291 282 271 260 0
b 2006 249 253 261 271 281 287 292 290 281 270 258 251
2007 247 253 261 272 281 288 292 290 282 270 259 0
c 2006 250 255 262 273 283 289 293 291 283 272 260 252
2007 247 254 262 274 283 290 293 291 283 272 261 0
d 2006 250 255 262 273 283 289 293 291 283 272 260 252
2007 248 254 262 274 283 289 293 291 283 272 261 0
e 2006 251 255 264 274 283 289 293 292 284 273 261 253
2007 250 255 264 275 283 290 294 292 284 273 262 0