Pandas Multivariate Linear Regression by Group and Saving Results as csv - pandas
I am trying to calculate linear regression of Y=C-A column, x = ['Plate X', 'Plate Y', 'Field X'] and group those values by Drum and Plate. Additional question - how to save results as a file, csv preferable.
Is pandas package is sufficient for this task or other package needed.
Thank you
There is my data set:
DF = {'A': {0: 305.03277000000003,
1: 304.42513500000001,
2: 305.119575,
3: 304.42513500000001,
4: 304.07791500000002,
5: 304.85916000000003,
6: 305.72721000000001,
7: 305.81401499999998,
8: 304.07791500000002,
9: 305.03277000000003,
10: 304.68554999999998,
11: 304.945965,
12: 303.38347499999998,
13: 304.945965,
14: 304.51193999999998,
15: 304.25152500000002,
16: 304.51193999999998,
17: 304.25152500000002,
18: 304.42513500000001,
19: 304.85916000000003,
20: 303.8175,
21: 305.119575,
22: 304.59874500000001,
23: 304.68554999999998,
24: 304.33832999999998,
25: 303.90430499999997,
26: 304.68554999999998,
27: 304.772355,
28: 304.59874500000001,
29: 304.772355,
30: 304.59874500000001,
31: 305.119575,
32: 305.37998999999996,
33: 304.59874500000001,
34: 304.42513500000001,
35: 304.33832999999998,
36: 304.51193999999998,
37: 305.46679499999999,
38: 304.59874500000001,
39: 305.29318499999999,
40: 304.85916000000003,
41: 305.29318499999999,
42: 305.119575,
43: 304.945965,
44: 305.29318499999999,
45: 304.85916000000003,
46: 305.72721000000001,
47: 306.16123500000003,
48: 305.37998999999996,
49: 305.03277000000003,
50: 305.20637999999997,
51: 304.51193999999998,
52: 308.33136000000002,
53: 305.81401499999998,
54: 305.55360000000002,
55: 306.42165,
56: 305.64040499999999,
57: 305.29318499999999,
58: 305.37998999999996,
59: 304.772355,
60: 305.37998999999996,
61: 305.72721000000001,
62: 305.90082000000001,
63: 305.64040499999999,
64: 305.81401499999998,
65: 304.85916000000003,
66: 305.20637999999997,
67: 306.42165,
68: 305.64040499999999,
69: 305.55360000000002,
70: 304.59874500000001,
71: 305.55360000000002,
72: 306.07443000000001,
73: 306.42165,
74: 305.98762499999998,
75: 306.68206499999997,
76: 305.03277000000003,
77: 305.46679499999999,
78: 306.42165,
79: 304.85916000000003,
80: 304.51193999999998,
81: 303.8175,
82: 304.51193999999998,
83: 304.16472000000005,
84: 304.51193999999998,
85: 303.73069500000003,
86: 303.29667000000001,
87: 304.68554999999998,
88: 303.73069500000003,
89: 304.42513500000001,
90: 304.51193999999998,
91: 304.16472000000005,
92: 304.945965,
93: 304.772355,
94: 304.42513500000001,
95: 304.16472000000005,
96: 305.119575,
97: 304.16472000000005,
98: 304.25152500000002,
99: 305.20637999999997},
'B': {0: 311.10912000000002,
1: 310.93551000000002,
2: 313.279245,
3: 313.19243999999998,
4: 309.11260499999997,
5: 309.0258,
6: 309.72023999999999,
7: 313.279245,
8: 311.89036499999997,
9: 311.19592499999999,
10: 308.76538500000004,
11: 309.72023999999999,
12: 312.15078,
13: 309.19941,
14: 308.50497000000001,
15: 308.33136000000002,
16: 309.89384999999999,
17: 310.848705,
18: 312.23758500000002,
19: 313.53966000000003,
20: 309.72023999999999,
21: 309.11260499999997,
22: 311.89036499999997,
23: 309.98065499999996,
24: 309.19941,
25: 310.41467999999998,
26: 311.62995000000001,
27: 311.02231499999999,
28: 310.32787500000001,
29: 310.06745999999998,
30: 311.89036499999997,
31: 311.89036499999997,
32: 309.98065499999996,
33: 312.06397500000003,
34: 306.85567500000002,
35: 309.98065499999996,
36: 311.80356,
37: 309.19941,
38: 312.41119500000002,
39: 310.848705,
40: 311.10912000000002,
41: 310.501485,
42: 313.80007499999999,
43: 308.24455499999999,
44: 312.49799999999999,
45: 313.10563500000001,
46: 313.19243999999998,
47: 309.63343500000002,
48: 311.10912000000002,
49: 310.501485,
50: 310.58828999999997,
51: 314.23410000000001,
52: 312.41119500000002,
53: 313.01882999999998,
54: 311.19592499999999,
55: 311.54314500000004,
56: 313.279245,
57: 311.54314500000004,
58: 311.45634000000001,
59: 313.19243999999998,
60: 312.15078,
61: 312.15078,
62: 313.452855,
63: 311.02231499999999,
64: 311.02231499999999,
65: 311.28272999999996,
66: 311.02231499999999,
67: 307.897335,
68: 313.19243999999998,
69: 311.97717,
70: 311.10912000000002,
71: 312.58480499999996,
72: 312.58480499999996,
73: 315.01534500000002,
74: 311.97717,
75: 313.452855,
76: 311.80356,
77: 308.67857999999995,
78: 311.71675499999998,
79: 311.36953499999998,
80: 310.501485,
81: 308.85219000000001,
82: 311.10912000000002,
83: 309.37302,
84: 307.98413999999997,
85: 311.10912000000002,
86: 311.28272999999996,
87: 310.93551000000002,
88: 310.24107000000004,
89: 307.11608999999999,
90: 307.55011500000001,
91: 308.76538500000004,
92: 310.848705,
93: 307.02928500000002,
94: 309.89384999999999,
95: 311.28272999999996,
96: 307.81052999999997,
97: 309.72023999999999,
98: 311.54314500000004,
99: 310.32787500000001},
'C': {0: 305.72721000000001,
1: 306.00498599999997,
2: 306.49109399999998,
3: 306.59526,
4: 305.48415599999998,
5: 305.24110200000001,
6: 306.28276199999999,
7: 306.97720199999998,
8: 306.80359199999998,
9: 307.081368,
10: 306.10915199999999,
11: 304.47721799999999,
12: 305.24110200000001,
13: 304.68554999999998,
14: 306.35220600000002,
15: 305.17165799999998,
16: 306.45637200000004,
17: 305.86609800000002,
18: 306.734148,
19: 306.28276199999999,
20: 305.51887799999997,
21: 308.053584,
22: 306.52581600000002,
23: 305.935542,
24: 306.56053800000001,
25: 306.10915199999999,
26: 306.56053800000001,
27: 305.79665399999999,
28: 305.761932,
29: 304.75499400000001,
30: 306.07443000000001,
31: 306.35220600000002,
32: 305.86609800000002,
33: 307.01192400000002,
34: 306.28276199999999,
35: 305.55360000000002,
36: 306.35220600000002,
37: 306.80359199999998,
38: 305.90082000000001,
39: 306.03970800000002,
40: 307.18553399999996,
41: 304.82443799999999,
42: 305.83137599999998,
43: 306.97720199999998,
44: 306.38692799999995,
45: 306.49109399999998,
46: 306.38692799999995,
47: 306.52581600000002,
48: 305.06749200000002,
49: 306.07443000000001,
50: 306.56053800000001,
51: 305.48415599999998,
52: 305.69248799999997,
53: 307.63692000000003,
54: 307.28969999999998,
55: 305.62304399999999,
56: 306.38692799999995,
57: 305.86609800000002,
58: 306.56053800000001,
59: 305.55360000000002,
60: 306.07443000000001,
61: 306.52581600000002,
62: 306.56053800000001,
63: 305.34526800000003,
64: 305.24110200000001,
65: 304.58138399999996,
66: 307.04664600000001,
67: 306.00498599999997,
68: 305.79665399999999,
69: 306.49109399999998,
70: 305.51887799999997,
71: 305.72721000000001,
72: 306.31748399999998,
73: 306.03970800000002,
74: 307.15081200000003,
75: 307.60219799999999,
76: 304.92860400000001,
77: 304.68554999999998,
78: 305.58832200000001,
79: 305.449434,
80: 306.83831400000003,
81: 306.49109399999998,
82: 306.94247999999999,
83: 304.963326,
84: 307.25497799999999,
85: 305.97026399999999,
86: 306.07443000000001,
87: 305.761932,
88: 305.90082000000001,
89: 306.31748399999998,
90: 306.69942599999996,
91: 306.07443000000001,
92: 305.449434,
93: 304.789716,
94: 304.72027200000002,
95: 306.10915199999999,
96: 305.449434,
97: 305.31054599999999,
98: 305.31054599999999,
99: 306.45637200000004},
'C-A': {0: 0.69443999999999995,
1: 1.5798510000000001,
2: 1.3715190000000002,
3: 2.1701250000000001,
4: 1.4062410000000001,
5: 0.381942,
6: 0.55555200000000005,
7: 1.163187,
8: 2.7256770000000001,
9: 2.0485980000000001,
10: 1.423602,
11: -0.46874700000000002,
12: 1.8576270000000001,
13: -0.26041500000000001,
14: 1.840266,
15: 0.92013299999999998,
16: 1.9444319999999999,
17: 1.614573,
18: 2.3090130000000002,
19: 1.423602,
20: 1.7013779999999998,
21: 2.9340090000000001,
22: 1.927071,
23: 1.249992,
24: 2.2222080000000002,
25: 2.204847,
26: 1.8749880000000001,
27: 1.0242990000000001,
28: 1.163187,
29: -0.017361000000000001,
30: 1.4756850000000001,
31: 1.232631,
32: 0.48610799999999998,
33: 2.413179,
34: 1.8576270000000001,
35: 1.2152700000000001,
36: 1.840266,
37: 1.336797,
38: 1.3020750000000001,
39: 0.74652299999999994,
40: 2.3263739999999999,
41: -0.46874700000000002,
42: 0.71180100000000002,
43: 2.031237,
44: 1.0937430000000001,
45: 1.631934,
46: 0.65971800000000003,
47: 0.36458099999999999,
48: -0.312498,
49: 1.04166,
50: 1.354158,
51: 0.97221599999999997,
52: -2.6388720000000001,
53: 1.822905,
54: 1.7361,
55: -0.79860600000000004,
56: 0.74652299999999994,
57: 0.57291300000000001,
58: 1.1805479999999999,
59: 0.78124499999999997,
60: 0.69443999999999995,
61: 0.79860600000000004,
62: 0.65971800000000003,
63: -0.29513699999999998,
64: -0.57291300000000001,
65: -0.27777600000000002,
66: 1.840266,
67: -0.41666400000000003,
68: 0.156249,
69: 0.93749400000000005,
70: 0.92013299999999998,
71: 0.17360999999999999,
72: 0.24305399999999999,
73: -0.381942,
74: 1.163187,
75: 0.92013299999999998,
76: -0.10416600000000001,
77: -0.78124499999999997,
78: -0.83332800000000007,
79: 0.59027399999999997,
80: 2.3263739999999999,
81: 2.673594,
82: 2.4305400000000001,
83: 0.79860600000000004,
84: 2.7430380000000003,
85: 2.2395689999999999,
86: 2.7777599999999998,
87: 1.0763819999999999,
88: 2.1701250000000001,
89: 1.8923490000000001,
90: 2.1874860000000003,
91: 1.9097099999999998,
92: 0.50346899999999994,
93: 0.017361000000000001,
94: 0.29513699999999998,
95: 1.9444319999999999,
96: 0.32985900000000001,
97: 1.145826,
98: 1.059021,
99: 1.249992},
'Drum': {0: 'LAAA',
1: 'LAAA',
2: 'LAAA',
3: 'LAAA',
4: 'LAAA',
5: 'LAAA',
6: 'LAAA',
7: 'LAAA',
8: 'LAAA',
9: 'LAAA',
10: 'LAAA',
11: 'LAAA',
12: 'LAAA',
13: 'LAAA',
14: 'LAAA',
15: 'LAAA',
16: 'LAAA',
17: 'LAAA',
18: 'LAAA',
19: 'LAAA',
20: 'LAAA',
21: 'LAAA',
22: 'LAAA',
23: 'LAAA',
24: 'LAAA',
25: 'LAAA',
26: 'LAAA',
27: 'LAAA',
28: 'LAAA',
29: 'LAAA',
30: 'LAAA',
31: 'LAAA',
32: 'LAAA',
33: 'LAAA',
34: 'LAAA',
35: 'LAAA',
36: 'LAAA',
37: 'LAAA',
38: 'LAAA',
39: 'LAAA',
40: 'LAAA',
41: 'LAAA',
42: 'LAAA',
43: 'LAAA',
44: 'LAAA',
45: 'LAAA',
46: 'LAAA',
47: 'LAAA',
48: 'LAAA',
49: 'LAAA',
50: 'LAAA',
51: 'LAAA',
52: 'LAAA',
53: 'LAAA',
54: 'LAAA',
55: 'LAAA',
56: 'LAAA',
57: 'LAAA',
58: 'LAAA',
59: 'LAAA',
60: 'LAAA',
61: 'LAAA',
62: 'LAAA',
63: 'LAAA',
64: 'LAAA',
65: 'LAAA',
66: 'LAAA',
67: 'LAAA',
68: 'LAAA',
69: 'LAAA',
70: 'LAAA',
71: 'LAAA',
72: 'LAAA',
73: 'LAAA',
74: 'LAAA',
75: 'LAAA',
76: 'LAAA',
77: 'LAAA',
78: 'LAAA',
79: 'LAAA',
80: 'LAAA',
81: 'LAAA',
82: 'LAAA',
83: 'LAAA',
84: 'LAAA',
85: 'LAAA',
86: 'LAAA',
87: 'LAAA',
88: 'LAAA',
89: 'LAAA',
90: 'LAAA',
91: 'LAAA',
92: 'LAAA',
93: 'LAAA',
94: 'LAAA',
95: 'LAAA',
96: 'LAAA',
97: 'LAAA',
98: 'LAAA',
99: 'LAAA'},
'FIELD X': {0: 4.7949800000000007,
1: -5.5198839999999993,
2: 4.7949800000000007,
3: 4.7949800000000007,
4: -5.5198839999999993,
5: 4.7949800000000007,
6: -5.5198839999999993,
7: 4.7949800000000007,
8: 4.7949800000000007,
9: -5.5198839999999993,
10: -5.5198839999999993,
11: 4.7949800000000007,
12: 4.7949800000000007,
13: -5.5198839999999993,
14: 4.7949800000000007,
15: -5.5198839999999993,
16: 4.7949800000000007,
17: -5.5198839999999993,
18: 4.7949800000000007,
19: 4.7949800000000007,
20: -5.5198839999999993,
21: 4.7949800000000007,
22: -5.5198839999999993,
23: 4.7949800000000007,
24: 4.7949800000000007,
25: -5.5198839999999993,
26: 4.7949800000000007,
27: -5.5198839999999993,
28: -5.5198839999999993,
29: 4.7949800000000007,
30: -5.5198839999999993,
31: 4.7949800000000007,
32: 4.7949800000000007,
33: -5.5198839999999993,
34: 4.7949800000000007,
35: -5.5198839999999993,
36: 4.7949800000000007,
37: -5.5198839999999993,
38: 4.7949800000000007,
39: -5.5198839999999993,
40: 4.7949800000000007,
41: -5.5198839999999993,
42: 4.7949800000000007,
43: -5.5198839999999993,
44: 4.7949800000000007,
45: -5.5198839999999993,
46: 4.7949800000000007,
47: -5.5198839999999993,
48: 4.7949800000000007,
49: -5.5198839999999993,
50: -5.5198839999999993,
51: 4.7949800000000007,
52: -5.5198839999999993,
53: 4.7949800000000007,
54: 4.7949800000000007,
55: -5.5198839999999993,
56: 4.7949800000000007,
57: -5.5198839999999993,
58: 4.7949800000000007,
59: -5.5198839999999993,
60: 4.7949800000000007,
61: 4.7949800000000007,
62: -5.5198839999999993,
63: 4.7949800000000007,
64: -5.5198839999999993,
65: 4.7949800000000007,
66: 4.7949800000000007,
67: -5.5198839999999993,
68: 4.7949800000000007,
69: -5.5198839999999993,
70: -5.5198839999999993,
71: 4.7949800000000007,
72: -5.5198839999999993,
73: 4.7949800000000007,
74: -5.5198839999999993,
75: 4.7949800000000007,
76: -5.5198839999999993,
77: -5.5198839999999993,
78: 4.7949800000000007,
79: -5.5198839999999993,
80: 4.7949800000000007,
81: -5.5198839999999993,
82: 4.7949800000000007,
83: 4.7949800000000007,
84: -5.5198839999999993,
85: 4.7949800000000007,
86: -5.5198839999999993,
87: 4.7949800000000007,
88: 4.7949800000000007,
89: -5.5198839999999993,
90: -5.5198839999999993,
91: 4.7949800000000007,
92: 4.7949800000000007,
93: -5.5198839999999993,
94: 4.7949800000000007,
95: -5.5198839999999993,
96: 4.7949800000000007,
97: -5.5198839999999993,
98: 4.7949800000000007,
99: 4.7949800000000007},
'FIELD Y': {0: 1.8893500000000001,
1: 1.8893500000000001,
2: 1.8893500000000001,
3: 1.8893500000000001,
4: 1.8893500000000001,
5: 1.8893500000000001,
6: 1.8893500000000001,
7: 1.8893500000000001,
8: 1.8893500000000001,
9: 1.8893500000000001,
10: 1.8893500000000001,
11: 1.8893500000000001,
12: 1.8893500000000001,
13: 1.8893500000000001,
14: 1.8893500000000001,
15: 1.8893500000000001,
16: 1.8893500000000001,
17: 1.8893500000000001,
18: 1.8893500000000001,
19: 1.8893500000000001,
20: 1.8893500000000001,
21: 1.8893500000000001,
22: 1.8893500000000001,
23: 1.8893500000000001,
24: 1.8893500000000001,
25: 1.8893500000000001,
26: 1.8893500000000001,
27: 1.8893500000000001,
28: 1.8893500000000001,
29: 1.8893500000000001,
30: 1.8893500000000001,
31: 1.8893500000000001,
32: 1.8893500000000001,
33: 1.8893500000000001,
34: 1.8893500000000001,
35: 1.8893500000000001,
36: 1.8893500000000001,
37: 1.8893500000000001,
38: 1.8893500000000001,
39: 1.8893500000000001,
40: 1.8893500000000001,
41: 1.8893500000000001,
42: 1.8893500000000001,
43: 1.8893500000000001,
44: 1.8893500000000001,
45: 1.8893500000000001,
46: 1.8893500000000001,
47: 1.8893500000000001,
48: 1.8893500000000001,
49: 1.8893500000000001,
50: 1.8893500000000001,
51: 1.8893500000000001,
52: 1.8893500000000001,
53: 1.8893500000000001,
54: 1.8893500000000001,
55: 1.8893500000000001,
56: 1.8893500000000001,
57: 1.8893500000000001,
58: 1.8893500000000001,
59: 1.8893500000000001,
60: 1.8893500000000001,
61: 1.8893500000000001,
62: 1.8893500000000001,
63: 1.8893500000000001,
64: 1.8893500000000001,
65: 1.8893500000000001,
66: 1.8893500000000001,
67: 1.8893500000000001,
68: 1.8893500000000001,
69: 1.8893500000000001,
70: 1.8893500000000001,
71: 1.8893500000000001,
72: 1.8893500000000001,
73: 1.8893500000000001,
74: 1.8893500000000001,
75: 1.8893500000000001,
76: 1.8893500000000001,
77: 1.8893500000000001,
78: 1.8893500000000001,
79: 1.8893500000000001,
80: 1.8893500000000001,
81: 1.8893500000000001,
82: 1.8893500000000001,
83: 1.8893500000000001,
84: 1.8893500000000001,
85: 1.8893500000000001,
86: 1.8893500000000001,
87: 1.8893500000000001,
88: 1.8893500000000001,
89: 1.8893500000000001,
90: 1.8893500000000001,
91: 1.8893500000000001,
92: 1.8893500000000001,
93: 1.8893500000000001,
94: 1.8893500000000001,
95: 1.8893500000000001,
96: 1.8893500000000001,
97: 1.8893500000000001,
98: 1.8893500000000001,
99: 1.8893500000000001},
'Plate': {0: 72,
1: 72,
2: 72,
3: 72,
4: 72,
5: 72,
6: 72,
7: 72,
8: 72,
9: 72,
10: 72,
11: 72,
12: 72,
13: 72,
14: 72,
15: 72,
16: 72,
17: 72,
18: 72,
19: 72,
20: 72,
21: 72,
22: 72,
23: 72,
24: 72,
25: 72,
26: 72,
27: 72,
28: 72,
29: 72,
30: 72,
31: 72,
32: 72,
33: 72,
34: 72,
35: 72,
36: 72,
37: 72,
38: 72,
39: 72,
40: 72,
41: 72,
42: 72,
43: 72,
44: 72,
45: 72,
46: 72,
47: 72,
48: 72,
49: 72,
50: 72,
51: 72,
52: 72,
53: 72,
54: 72,
55: 72,
56: 72,
57: 72,
58: 72,
59: 72,
60: 72,
61: 72,
62: 72,
63: 72,
64: 72,
65: 72,
66: 72,
67: 72,
68: 72,
69: 72,
70: 72,
71: 72,
72: 72,
73: 72,
74: 72,
75: 72,
76: 72,
77: 72,
78: 72,
79: 72,
80: 131,
81: 131,
82: 131,
83: 131,
84: 131,
85: 131,
86: 131,
87: 131,
88: 131,
89: 131,
90: 131,
91: 131,
92: 131,
93: 131,
94: 131,
95: 131,
96: 131,
97: 131,
98: 131,
99: 131},
'Plate X': {0: -134.13406000000001,
1: -134.13406000000001,
2: -134.13406000000001,
3: -113.50433200000001,
4: -113.50433200000001,
5: -113.50433200000001,
6: -113.50433200000001,
7: -113.50433200000001,
8: -92.874604000000005,
9: -92.874604000000005,
10: -92.874604000000005,
11: -92.874604000000005,
12: -72.244876000000005,
13: -72.244876000000005,
14: -72.244876000000005,
15: -72.244876000000005,
16: -72.244876000000005,
17: -72.244876000000005,
18: -72.244876000000005,
19: -51.615147999999998,
20: -51.615147999999998,
21: -51.615147999999998,
22: -51.615147999999998,
23: -51.615147999999998,
24: -30.985420000000001,
25: -30.985420000000001,
26: -30.985420000000001,
27: -30.985420000000001,
28: -30.985420000000001,
29: -30.985420000000001,
30: -30.985420000000001,
31: -30.985420000000001,
32: -10.355691999999999,
33: -10.355691999999999,
34: -10.355691999999999,
35: -10.355691999999999,
36: -10.355691999999999,
37: -10.355691999999999,
38: -10.355691999999999,
39: 10.274036000000001,
40: 10.274036000000001,
41: 10.274036000000001,
42: 10.274036000000001,
43: 10.274036000000001,
44: 10.274036000000001,
45: 10.274036000000001,
46: 30.903764000000002,
47: 30.903764000000002,
48: 30.903764000000002,
49: 30.903764000000002,
50: 30.903764000000002,
51: 30.903764000000002,
52: 30.903764000000002,
53: 30.903764000000002,
54: 51.533491999999995,
55: 51.533491999999995,
56: 51.533491999999995,
57: 51.533491999999995,
58: 51.533491999999995,
59: 51.533491999999995,
60: 51.533491999999995,
61: 72.163219999999995,
62: 72.163219999999995,
63: 72.163219999999995,
64: 72.163219999999995,
65: 72.163219999999995,
66: 72.163219999999995,
67: 92.792947999999996,
68: 92.792947999999996,
69: 92.792947999999996,
70: 92.792947999999996,
71: 92.792947999999996,
72: 113.422676,
73: 113.422676,
74: 113.422676,
75: 113.422676,
76: 113.422676,
77: 134.052404,
78: 134.052404,
79: 134.052404,
80: -134.13406000000001,
81: -134.13406000000001,
82: -134.13406000000001,
83: -113.50433200000001,
84: -113.50433200000001,
85: -113.50433200000001,
86: -113.50433200000001,
87: -113.50433200000001,
88: -92.874604000000005,
89: -92.874604000000005,
90: -92.874604000000005,
91: -92.874604000000005,
92: -72.244876000000005,
93: -72.244876000000005,
94: -72.244876000000005,
95: -72.244876000000005,
96: -72.244876000000005,
97: -72.244876000000005,
98: -72.244876000000005,
99: -51.615147999999998},
'Plate Y': {0: -27.0123,
1: 0.039899999999999998,
2: 27.092099999999999,
3: -81.116699999999994,
4: -54.064500000000002,
5: 0.039899999999999998,
6: 54.144300000000001,
7: 81.1965,
8: -54.064500000000002,
9: -27.0123,
10: 27.092099999999999,
11: 54.144300000000001,
12: -108.16889999999999,
13: -81.116699999999994,
14: -27.0123,
15: 0.039899999999999998,
16: 27.092099999999999,
17: 81.1965,
18: 108.2487,
19: -81.116699999999994,
20: -54.064500000000002,
21: 0.039899999999999998,
22: 54.144300000000001,
23: 81.1965,
24: -135.22110000000001,
25: -108.16889999999999,
26: -54.064500000000002,
27: -27.0123,
28: 27.092099999999999,
29: 54.144300000000001,
30: 108.2487,
31: 135.30090000000001,
32: -108.16889999999999,
33: -81.116699999999994,
34: -27.0123,
35: 0.039899999999999998,
36: 27.092099999999999,
37: 81.1965,
38: 108.2487,
39: -135.22110000000001,
40: -81.116699999999994,
41: -54.064500000000002,
42: 0.039899999999999998,
43: 54.144300000000001,
44: 81.1965,
45: 135.30090000000001,
46: -135.22110000000001,
47: -108.16889999999999,
48: -54.064500000000002,
49: -27.0123,
50: 27.092099999999999,
51: 54.144300000000001,
52: 108.2487,
53: 135.30090000000001,
54: -108.16889999999999,
55: -81.116699999999994,
56: -27.0123,
57: 0.039899999999999998,
58: 27.092099999999999,
59: 81.1965,
60: 108.2487,
61: -81.116699999999994,
62: -54.064500000000002,
63: 0.039899999999999998,
64: 54.144300000000001,
65: 81.1965,
66: 108.2487,
67: -108.16889999999999,
68: -54.064500000000002,
69: -27.0123,
70: 27.092099999999999,
71: 54.144300000000001,
72: -81.116699999999994,
73: -27.0123,
74: 0.039899999999999998,
75: 27.092099999999999,
76: 81.1965,
77: -54.064500000000002,
78: 0.039899999999999998,
79: 54.144300000000001,
80: -27.0123,
81: 0.039899999999999998,
82: 27.092099999999999,
83: -81.116699999999994,
84: -54.064500000000002,
85: 0.039899999999999998,
86: 54.144300000000001,
87: 81.1965,
88: -54.064500000000002,
89: -27.0123,
90: 27.092099999999999,
91: 54.144300000000001,
92: -108.16889999999999,
93: -81.116699999999994,
94: -27.0123,
95: 0.039899999999999998,
96: 27.092099999999999,
97: 81.1965,
98: 108.2487,
99: -81.116699999999994},
'Unnamed: 0': {0: 0,
1: 1,
2: 2,
3: 3,
4: 4,
5: 5,
6: 6,
7: 7,
8: 8,
9: 9,
10: 10,
11: 11,
12: 12,
13: 13,
14: 14,
15: 15,
16: 16,
17: 17,
18: 18,
19: 19,
20: 20,
21: 21,
22: 22,
23: 23,
24: 24,
25: 25,
26: 26,
27: 27,
28: 28,
29: 29,
30: 30,
31: 31,
32: 32,
33: 33,
34: 34,
35: 35,
36: 36,
37: 37,
38: 38,
39: 39,
40: 40,
41: 41,
42: 42,
43: 43,
44: 44,
45: 45,
46: 46,
47: 47,
48: 48,
49: 49,
50: 50,
51: 51,
52: 52,
53: 53,
54: 54,
55: 55,
56: 56,
57: 57,
58: 58,
59: 59,
60: 60,
61: 61,
62: 62,
63: 63,
64: 64,
65: 65,
66: 66,
67: 67,
68: 68,
69: 69,
70: 70,
71: 71,
72: 72,
73: 73,
74: 74,
75: 75,
76: 76,
77: 77,
78: 78,
79: 79,
80: 80,
81: 81,
82: 82,
83: 83,
84: 84,
85: 85,
86: 86,
87: 87,
88: 88,
89: 89,
90: 90,
91: 91,
92: 92,
93: 93,
94: 94,
95: 95,
96: 96,
97: 97,
98: 98,
99: 99}}
From your question it doesnt sound like you want a multivariate regression (i.e. multiple Y's). If you're just predicting a single Y from multiple X's, you can do it like this with pandas, then save the results to a txt file:
import pandas as pd
df = pd.DataFrame(DF)
res = pd.stats.api.ols(y=df['C-A'], x=df[['Plate X','Plate Y','FIELD X']])
file = open("C:/Users/Simon/Desktop/results.txt", "w")
file.write(str(res))
file.close()
You mentioned in the question that you want to group the analyses by Drum and Plate. However, every value is the same for the Drum rows. If you want to group by Plate, however, and then run OLS on each subgroup, you can do something like this:
import pandas as pd
df = pd.DataFrame(DF)
results = []
def ols_res(df):
results.append( pd.stats.api.ols(y=df['C-A'], x=df[['Plate X','Plate Y','FIELD X']]))
df.groupby('Plate').apply(lambda newdf: ols_res(newdf))
file = open("C:/Users/Simon/Desktop/results.txt", "w")
for el in results:
file.write(str(el))
file.close()
If you want to also group by Drum, and note which drum/plate combination each analysis is for, you can do something like this and just add some extra txt to the results file:
import pandas as pd
df = pd.DataFrame(DF)
results = []
def ols_res(df):
curCombo = "plate:" + str(df["Plate"].mean()) + ", drum:" + str(df["Drum"].unique())
regression_results = pd.stats.api.ols(y=df['C-A'], x=df[['Plate X','Plate Y','FIELD X']])
results.append([curCombo, regression_results])
df.groupby(['Plate', 'Drum']).apply(lambda newdf: ols_res(newdf))
file = open("C:/Users/Simon/Desktop/results.txt", "w")
for el in results:
file.write(str(el))
file.write("\n\n")
file.close()
Related
Number of instances in a list variable pandas
in my database I have an id (docdb_family_id) and a list of ids (cited_docdb_list) as follows: {'docdb_family_id': {0: 3498148, 1: 3512921, 2: 3525647, 3: 3636418, 4: 3673165, 5: 3680127, 6: 3688953, 7: 3689983, 8: 3700898, 9: 3768731, 10: 3770463, 11: 3771404, 12: 3771425, 13: 3771495, 14: 3771604, 15: 3772274, 16: 3772510, 17: 3772940, 18: 3775109, 19: 3779413, 20: 3783583, 21: 3784332, 22: 3784469, 23: 3787179, 24: 3787982, 25: 3790639, 26: 3790670, 27: 3792458, 28: 3795015, 29: 3799670, 30: 3800683, 31: 3802132, 32: 3802281, 33: 3803326, 34: 3803728, 35: 3808684, 36: 3809416, 37: 3810114, 38: 3811389, 39: 3812435, 40: 3813073, 41: 3813312, 42: 3815934, 43: 3816821, 44: 3816927, 45: 3817424, 46: 3818542, 47: 3818766, 48: 3819057, 49: 3819335, 50: 3820633, 51: 3820694, 52: 3821540, 53: 3821838, 54: 3822049, 55: 3822089, 56: 3823057, 57: 3823114, 58: 3824187, 59: 3824375, 60: 3825785, 61: 3826171, 62: 3826211, 63: 3827560, 64: 3828464, 65: 3829519, 66: 3829990, 67: 3831455, 68: 3831510, 69: 3831784, 70: 3831999, 71: 3832248, 72: 3832987, 73: 3834046, 74: 3834444, 75: 3835251, 76: 3886195, 77: 3887480, 78: 3890389, 79: 3892024, 80: 3944218}, 'cited_docdb_list': {0: '[3454392.0, 3489764.0, 3492286.0, 3802281.0, 3944218.0, 4161113.0, 6055754.0, 4167218.0, 6245259.0, 6310327.0, 6339325.0, 7865817.0, 10818295.0, 21820994.0, 25257112.0, 25333370.0, 25421470.0]', 1: '[22785397.0, 3800683.0]', 2: '[3508710.0, 3832248.0, 6015961.0, 9173676.0, 22615010.0]', 3: '[3482303.0, 3518675.0, 3688207.0, 3688953.0, 7856041.0, 9893906.0, 9911676.0, 21740142.0, 22095959.0, 22224845.0, 22455261.0, 22522023.0, 23039462.0, 23149018.0, 23248627.0, 25608484.0, 26145960.0, 26246393.0, 27122358.0, 27215945.0, 27267946.0, 27368911.0, 27535943.0, 27569239.0, 27759996.0, 34107815.0, 35219296.0, 46248356.0]', 4: '[7917626.0, 13587294.0, 15860525.0, 16099836.0, 18349663.0, 18831836.0, 24223941.0, 26558149.0]', 5: '[3680147.0, 3680169.0, 6442447.0, 8168860.0, 8170479.0, 8178540.0, 8178541.0, 10655404.0, 10764890.0, 10765687.0, 11600956.0, 14593411.0, 22296890.0, 22471622.0, 24169239.0, 24966171.0, 25033444.0, 25166841.0, 25372199.0, 25459000.0, 25533862.0, 25918313.0, 26371384.0, 26439834.0, 27274967.0, 27294655.0, 27523014.0]', 6: '[5459370.0, 16645542.0, 17462457.0, 21959571.0, 22010115.0, 22296144.0, 26927437.0, 33041169.0, 33101777.0, 34066530.0]', 7: '[7650618.0, 7806400.0, 7835575.0, 7857812.0, 8210353.0, 8232323.0, 8239494.0, 10024300.0, 11566936.0, 11637978.0, 11942149.0, 12192469.0, 12437164.0, 12474858.0, 12862377.0, 13357403.0, 13391145.0, 13884195.0, 14268316.0, 14780600.0, 14837681.0, 14959673.0, 15493334.0, 15660109.0, 15690908.0, 15706187.0, 15740492.0, 16185014.0, 16286275.0, 16301821.0, 16400795.0, 16599264.0, 16867936.0, 17017842.0, 17303135.0, 18156945.0, 18168645.0, 18351330.0, 18357701.0, 18361853.0, 18553020.0, 18665747.0, 22042028.0, 22509938.0, 22752953.0, 22752985.0, 22955054.0, 23605846.0, 23635250.0, 24042617.0, 24281660.0, 24426092.0, 24470177.0, 25217414.0, 25342266.0, 25399276.0, 25481652.0, 26026958.0, 26034429.0, 26150729.0, 26427482.0, 26488815.0, 26500234.0, 26537700.0, 26644976.0, 26692209.0, 26785282.0, 27339916.0, 27370666.0, 27372394.0, 27524906.0, 27563165.0, 29229947.0, 49274340.0]', 8: '[3764296.0, 3770459.0, 3773222.0, 3811210.0, 3825785.0, 6119308.0, 6262275.0, 6409776.0, 6450504.0, 6484157.0, 7640046.0, 7646955.0, 7762359.0, 7813503.0, 7823236.0, 7886063.0, 8103745.0, 10347742.0, 10563528.0, 11894269.0, 12556976.0, 12589238.0, 12666170.0, 12673679.0, 12702964.0, 13630878.0, 14026520.0, 14271281.0, 14325872.0, 14416179.0, 15383496.0, 15479503.0, 15920227.0, 16127226.0, 16222285.0, 16339588.0, 16871054.0, 16912938.0, 16912954.0, 16913656.0, 17401011.0, 17461197.0, 17474177.0, 17663812.0, 17724327.0, 18063449.0, 18227455.0, 18250669.0, 18386252.0, 18426307.0, 18587018.0, 18654484.0, 19300409.0, 19312456.0, 19372912.0, 19550439.0, 19638358.0, 19704233.0, 21801532.0, 21877403.0, 21974791.0, 22002267.0, 22067617.0, 22089128.0, 22098429.0, 22223747.0, 22276463.0, 22298327.0, 22341037.0, 22385483.0, 22395684.0, 22676560.0, 22731313.0, 22904054.0, 22918676.0, 23080548.0, 23084056.0, 23402016.0, 23516757.0, 23601888.0, 23628604.0, 23848237.0, 24030077.0, 24083853.0, 24132340.0, 24248118.0, 24251602.0, 24295241.0, 24316904.0, 24422851.0, 24429865.0, 24443752.0, 24547890.0, 24589548.0, 24632640.0, 24770649.0, 24785182.0, 24839047.0, 24962082.0, 25028009.0, 25378809.0, 25397848.0, 25410040.0, 25434196.0, 25449992.0, 25470970.0, 25494098.0, 25514405.0, 25525923.0, 25540364.0, 26040210.0, 26438189.0, 26450647.0, 26486031.0, 26707770.0, 26723069.0, 26723453.0, 26748272.0, 26870598.0, 26889379.0, 26889380.0, 26901249.0, 26985941.0, 26990011.0, 27000869.0, 27018916.0, 27025822.0, 27060755.0, 27060756.0, 27311622.0, 27315336.0, 27340467.0, 27569697.0, 37944191.0, 46149961.0, 46255262.0]', 9: '[8583594.0, 9119276.0, 21793982.0, 22133036.0, 24149220.0, 25776190.0, 26736757.0]', 10: '[10568655.0, 13302684.0, 19844775.0, 22493955.0, 26714695.0, 26997884.0]', 11: '[4344006.0, 24838031.0, 25098959.0, 25395637.0, 27025593.0]', 12: '[25642630.0, 25642846.0, 25642930.0, 26279148.0, 26287348.0]', 13: '[10451245.0, 10564358.0, 22491246.0, 24064440.0, 24279325.0, 24519613.0, 24651262.0, 25072503.0, 26461666.0, 26692304.0]', 14: '[4351264.0, 4384434.0, 6117960.0, 9116940.0, 10999954.0, 22148709.0, 22562211.0, 23862977.0, 24037344.0, 24361917.0, 24432647.0, 25076138.0, 26840072.0, 27429215.0]', 15: '[3692248.0, 6053171.0, 6226485.0, 12362875.0, 27371744.0]', 16: '[5933264.0, 6125219.0, 6247996.0, 10521070.0, 13063586.0, 15774983.0, 16803481.0, 16904934.0, 22065174.0, 27127184.0, 27496706.0, 27624793.0]', 17: '[3526456.0, 6170998.0, 6335295.0, 10505184.0, 11549684.0, 14422646.0, 15088415.0, 17645959.0, 22169836.0, 22901756.0, 22994874.0, 22994878.0, 23172874.0, 23925148.0, 25244507.0, 27389063.0]', 18: '[6350760.0, 20369026.0, 24216636.0, 26762272.0, 26927655.0, 27126594.0, 27371255.0]', 19: '[3775878.0, 6008063.0, 12812693.0, 13575794.0, 14790639.0, 22013262.0, 24622370.0, 26901485.0, 26985941.0, 27076644.0, 27112632.0]', 20: '[3775488.0, 10948289.0, 10952971.0, 10952974.0, 11367322.0, 12710129.0, 15469131.0, 22577881.0, 25644554.0, 26467182.0, 26933783.0, 27401801.0]', 21: '[6134715.0, 6350620.0, 15983939.0, 16269143.0, 17680987.0, 23994234.0, 24992672.0, 26268730.0, 26367621.0, 26629308.0, 26787837.0, 26988835.0, 27365620.0, 27455735.0, 27476152.0, 41508342.0]', 22: '[3690998.0, 3779413.0, 8103745.0, 10528617.0, 10533016.0, 14026520.0, 17474177.0, 21959397.0, 22069056.0, 23038428.0, 23077293.0, 24078130.0, 24160889.0, 25618055.0, 26462451.0, 27407332.0, 27569697.0]', 23: '[6512805.0, 8105738.0, 10680104.0, 10719170.0, 18290174.0, 22237701.0, 22290947.0, 23695912.0, 23765282.0, 24565635.0, 26289399.0, 27358491.0, 27420192.0]', 24: '[6462400.0, 16101703.0, 24045826.0, 25612324.0, 26283893.0, 26434155.0]', 25: '[8208100.0, 23566456.0, 23702554.0, 25266985.0, 26142859.0]', 26: '[3771632.0, 14240231.0, 15623240.0, 22486268.0, 23605938.0, 27170740.0]', 27: '[3798105.0, 46299235.0, 46299236.0, 46299237.0, 46299238.0, 46299740.0, 46299800.0]', 28: '[2631556.0, 2944019.0, 10790311.0, 13793711.0, 18470587.0, 21851951.0, 21924559.0, 23889759.0, 23927439.0, 23963011.0, 24766696.0, 26713651.0, 26990589.0, 27287227.0]', 29: '[3796218.0, 24589826.0, 25624390.0, 25765848.0]', 30: '[3772972.0, 6025591.0, 7764892.0, 12805981.0, 15547363.0, 16262273.0, 21905352.0, 22082762.0, 23922610.0, 23984212.0, 24257317.0, 25315731.0, 25402356.0, 25518280.0, 26719186.0, 26734227.0, 26940453.0, 26979759.0, 27025821.0, 27025822.0]', 31: '[3779080.0, 3794389.0, 9425562.0, 10768435.0, 22860582.0, 25471727.0, 25617513.0, 25620315.0, 25644721.0, 26092132.0, 27153345.0]', 32: '[2634854.0, 3700806.0, 3802276.0, 3802292.0, 3802325.0, 3802326.0, 3802327.0, 3802332.0, 3802333.0, 3802334.0, 3802337.0, 3802338.0, 3802339.0, 3802354.0, 3802356.0, 3805158.0, 3805178.0, 3805242.0, 3806854.0, 3808228.0, 3808232.0, 3808236.0, 3810760.0, 4258298.0, 6062612.0, 6161522.0, 6180029.0, 6243195.0, 6328004.0, 6352957.0, 6397822.0, 6415485.0, 6456158.0, 6476429.0, 6495895.0, 7588639.0, 9099878.0, 9119945.0, 9447476.0, 9454581.0, 9460842.0, 10036436.0, 10089783.0, 10642403.0, 10676758.0, 10702950.0, 10729821.0, 10746269.0, 11194385.0, 11411510.0, 11592343.0, 12638122.0, 12808119.0, 13792188.0, 13869248.0, 13880272.0, 14224791.0, 14363363.0, 14475114.0, 14555145.0, 14654996.0, 14659718.0, 14905880.0, 15009474.0, 15208979.0, 15365386.0, 15418108.0, 15427440.0, 15532726.0, 15759142.0, 15839949.0, 16148732.0, 16454470.0, 16472116.0, 16557241.0, 16567151.0, 16574330.0, 16670501.0, 16826733.0, 16866056.0, 16917358.0, 16952937.0, 17009237.0, 17042089.0, 17152410.0, 17167043.0, 17167057.0, 17176980.0, 17177751.0, 17203313.0, 17214040.0, 17359106.0, 17384372.0, 17390431.0, 17398779.0, 17419690.0, 17521757.0, 17541035.0, 17548222.0, 17692283.0, 17709222.0, 17752106.0, 17787836.0, 17980830.0, 18032898.0, 18091978.0, 18108188.0, 18157469.0, 18183177.0, 18202974.0, 18210551.0, 18356218.0, 18513671.0, 20358277.0, 21694022.0, 21760302.0, 21839477.0, 22005188.0, 22196129.0, 22231670.0, 22241704.0, 22321076.0, 22407725.0, 22574957.0, 22624317.0, 22688378.0, 22819977.0, 22837041.0, 22856540.0, 22891528.0, 22899520.0, 22911089.0, 22957363.0, 22978599.0, 23009341.0, 23016791.0, 23017033.0, 23194812.0, 23238114.0, 23242315.0, 23372955.0, 23403394.0, 23583171.0, 23717292.0, 23818247.0, 23822065.0, 23967128.0, 24023429.0, 24035021.0, 24041033.0, 24056428.0, 24092174.0, 24102216.0, 24115524.0, 24258574.0, 24305268.0, 24384033.0, 24407235.0, 24437414.0, 24440441.0, 24511068.0, 24607773.0, 24618564.0, 24640870.0, 24695776.0, 24712750.0, 24771021.0, 24777130.0, 24782249.0, 24802597.0, 24824797.0, 24857748.0, 24902244.0, 24921608.0, 24928011.0, 24981047.0, 24992362.0, 25006081.0, 25056097.0, 25079341.0, 25079896.0, 25098400.0, 25128528.0, 25157096.0, 25175720.0, 25184562.0, 25211651.0, 25273616.0, 25325219.0, 25395409.0, 25430909.0, 25431399.0, 25441093.0, 25458263.0, 25459754.0, 25478333.0, 25511171.0, 25540179.0, 25644902.0, 25645209.0, 25645479.0, 25645484.0, 25645485.0, 25645493.0, 25645494.0, 25645495.0, 25645496.0, 25645497.0, 25645498.0, 25645507.0, 25645510.0, 25645511.0, 25645513.0, 25645515.0, 25645516.0, 25645517.0, 25645524.0, 25645526.0, 25645531.0, 25645539.0, 25645541.0, 25645542.0, 25645545.0, 25645546.0, 25645548.0, 26138811.0, 26227739.0, 26352404.0, 26435079.0, 26437848.0, 26443181.0, 26495400.0, 26535740.0, 26564673.0, 26687357.0, 26688326.0, 26719816.0, 26767248.0, 26792309.0, 26883761.0, 27005599.0, 27048622.0, 27054476.0, 27157854.0, 27158025.0, 27204375.0, 27278808.0, 27279445.0, 27288524.0, 27308865.0, 27324977.0, 27325474.0, 27329746.0, 27339109.0, 27376149.0, 27467592.0, 27522909.0, 27526374.0, 27530134.0, 27530208.0, 27542962.0, 27550891.0, 27551736.0, 27552627.0, 27554184.0, 27554356.0, 27557355.0, 27577752.0, 27578291.0, 28455314.0, 29248275.0, 29999199.0, 31994773.0, 32302805.0, 32324813.0, 34221894.0, 34753905.0, 36808782.0, 36954792.0, 38226628.0, 38622001.0, 38622009.0, 46253718.0, 46302623.0, 46302626.0, 46330220.0, 56289952.0]', 33: '[9193201.0, 9690456.0, 11262890.0, 11857463.0, 20399558.0, 22182248.0, 23000715.0, 23242310.0, 23324343.0, 23849738.0, 24920698.0, 26305246.0]', 34: '[2103060.0, 3773965.0, 3774544.0, 3775695.0, 3775872.0, 3776256.0, 3786612.0, 3791581.0, 5870313.0, 5916275.0, 6021141.0, 6199234.0, 6245542.0, 6295893.0, 6295894.0, 6295895.0, 6296520.0, 6365302.0, 6421653.0, 6453213.0, 6470668.0, 6470669.0, 6505848.0, 7762300.0, 7996364.0, 8204435.0, 8504791.0, 8516769.0, 8537466.0, 9978587.0, 10525500.0, 10532630.0, 11421697.0, 11861168.0, 11938229.0, 12631519.0, 14831183.0, 15028144.0, 19729781.0, 19865575.0, 20357413.0, 21762166.0, 21916786.0, 22585241.0, 22736795.0, 22800842.0, 22821355.0, 23120569.0, 23397799.0, 23436004.0, 23481575.0, 23518025.0, 23722477.0, 23740173.0, 23790685.0, 23790691.0, 23790693.0, 23844609.0, 23967824.0, 24169834.0, 24225931.0, 24575089.0, 24686268.0, 24701256.0, 24701581.0, 24738797.0, 24962380.0, 25062108.0, 25145546.0, 25220031.0, 25326521.0, 25341958.0, 25350944.0, 25375270.0, 25532312.0, 25636025.0, 25671453.0, 25782505.0, 25782589.0, 26158327.0, 26516437.0, 26877119.0, 26950677.0, 27100111.0, 27157416.0, 27167473.0, 27286248.0, 27339086.0, 27339905.0, 27356707.0, 27404057.0, 27414896.0, 27461178.0, 27462950.0, 27464289.0, 27477792.0, 27490121.0, 41667474.0]', 35: '[3775287.0, 24656178.0, 25590998.0, 26752872.0, 27104052.0, 27111638.0, 27154855.0, 27449240.0, 27505577.0]', 36: '[10966704.0, 14429073.0, 14796404.0, 24388079.0, 25634499.0, 55024694.0]', 37: '[3810974.0, 6485046.0, 8220639.0, 10710317.0, 24372965.0, 25336013.0, 26139248.0, 30115768.0, 31188433.0, 34102684.0, 35502814.0, 41505355.0, 44170427.0, 46325309.0]', 38: '[3087303.0, 4124422.0, 20979317.0, 21870465.0, 23941444.0, 25013107.0, 25326934.0, 25638943.0, 26674623.0, 27041345.0, 27357929.0, 27505577.0]', 39: '[3796218.0, 3799670.0, 13202074.0, 16015369.0, 18376479.0, 21761811.0, 22420460.0, 25064869.0, 25362187.0, 25420991.0, 25645622.0]', 40: '[6383399.0, 11571184.0, 16203469.0, 19328209.0, 19338037.0, 23609959.0, 23669719.0, 24172105.0, 24533474.0, 25545404.0, 27031913.0, 27475424.0]', 41: '[3790030.0, 10844179.0, 17904788.0, 25518619.0, 25644273.0, 26230725.0, 27107515.0, 27358315.0]', 42: '[3765777.0, 5219438.0, 6509530.0, 9401909.0, 10606015.0, 11550806.0, 12762794.0, 13827315.0, 14042779.0, 15264928.0, 15458075.0, 15925094.0, 16128449.0, 17054858.0, 18055051.0, 18471454.0, 21862046.0, 22293413.0, 22679682.0, 24127226.0, 24176606.0, 24248291.0, 24679083.0, 25083983.0, 25400937.0, 26366826.0, 26985312.0]', 43: '[3775287.0, 3776915.0, 21721135.0, 22104735.0, 22570362.0, 25326934.0, 25584184.0, 25586333.0, 25638943.0, 26759870.0]', 44: '[4173143.0, 7807763.0, 13522010.0, 13654473.0, 13927771.0, 15719616.0, 16249907.0, 16525019.0, 21694632.0, 22093627.0, 22464844.0, 22964817.0, 23061734.0, 23211210.0, 23691361.0, 23831988.0, 23938149.0, 24244391.0, 24684633.0, 25241119.0, 25530551.0, 26801599.0, 27370214.0, 27539801.0, 27556890.0, 46249740.0]', 45: '[7830781.0, 7843024.0, 7852695.0, 8237386.0, 9444575.0, 10762585.0, 21739343.0, 21899596.0, 22200593.0, 23421862.0, 24138149.0, 25127817.0, 26792398.0, 33378328.0]', 46: '[24175325.0, 26752769.0, 26865384.0]', 47: '[3808127.0, 22989911.0, 22991587.0, 24661354.0, 25009434.0]', 48: '[3801540.0, 5986989.0, 7758470.0, 13433718.0, 13869888.0, 13870030.0, 13870091.0, 15253727.0, 15460683.0, 15581976.0, 15640684.0, 16014121.0, 17269442.0, 17330959.0, 18272758.0, 18289278.0, 19819299.0, 22635021.0, 22763032.0, 24234146.0, 25270151.0, 25330011.0, 26481016.0, 26873860.0, 30798811.0]', 49: '[4161793.0, 21787085.0, 22034688.0, 23282114.0, 24428824.0, 25016295.0]', 50: '[3793081.0, 3803264.0, 4207952.0, 11470889.0, 11669056.0, 12523378.0, 12636851.0, 12730154.0, 15584724.0, 16344287.0, 17109625.0, 17721742.0, 17745772.0, 17910462.0, 18186065.0, 18210837.0, 18223914.0, 21639272.0, 22927223.0, 26708844.0, 27047225.0, 27290433.0, 27308607.0, 27314463.0, 27584488.0, 60520854.0]', 51: '[26150927.0, 27292634.0]', 52: '[2092705.0, 2855690.0, 3448135.0, 3808851.0, 4531792.0, 7778731.0, 12783185.0, 17298876.0, 20135092.0, 20175428.0, 20913824.0, 21599292.0, 22046526.0, 22607332.0, 22691016.0, 22787233.0, 22930717.0, 23249413.0, 23308386.0, 23380573.0, 23824923.0, 23929977.0, 23970974.0, 24197297.0, 24485989.0, 25130652.0, 26732210.0, 26735928.0, 26743678.0, 26786285.0, 27584461.0, 29547928.0, 31990350.0, 78669067.0]', 53: '[22113349.0, 26695070.0, 27119373.0, 27493256.0]', 54: '[3777847.0, 3790007.0, 21871161.0, 22030506.0, 22031745.0, 22176213.0, 22401126.0, 23088391.0, 25613851.0, 25646253.0, 26671540.0, 26863907.0, 26903057.0, 27397174.0, 39338541.0]', 55: '[3802927.0, 3823288.0, 4984890.0, 4989432.0, 5073611.0, 5082137.0, 6061217.0, 6348178.0, 6423623.0, 10965588.0, 15797375.0, 18127308.0, 18175653.0, 18289498.0, 18849747.0, 21800742.0, 22397195.0, 23221251.0, 23468869.0, 23690813.0, 24191813.0, 24284509.0, 24708045.0, 24855719.0, 25014176.0, 25360346.0, 26846684.0, 27033183.0, 27275736.0, 27331606.0, 27490188.0, 27535521.0, 27568184.0, 27574439.0, 27578281.0, 27578284.0, 27650233.0, 34549244.0, 34746656.0, 35542271.0, 35736297.0, 36587440.0, 37433822.0, 37967362.0, 38022911.0, 38066849.0, 39925109.0, 46251516.0, 46252778.0, 46252929.0]', 56: '[1343281.0, 1345715.0, 3512210.0, 3783167.0, 4382571.0, 5813114.0, 7093752.0, 8235578.0, 8518638.0, 8783563.0, 8850107.0, 9121566.0, 9923753.0, 9955607.0, 10692798.0, 12383956.0, 12776229.0, 12886199.0, 12969910.0, 14707530.0, 14889080.0, 15072156.0, 19041276.0, 20298361.0, 21688702.0, 21900949.0, 21937269.0, 22104118.0, 22153767.0, 22186346.0, 22826706.0, 22855741.0, 22953235.0, 23004360.0, 23134063.0, 23354534.0, 23591524.0, 24305737.0, 24462242.0, 24489942.0, 24592901.0, 24641378.0, 25198004.0, 25253475.0, 25275454.0, 25432521.0, 25488956.0, 25643518.0, 26068855.0, 26166520.0, 26320235.0, 26328728.0, 26331139.0, 26428311.0, 26693295.0, 26791936.0, 26793455.0, 26961378.0, 26972264.0, 27059428.0, 27157985.0, 27313342.0, 27379089.0, 27395407.0, 27399829.0, 27424041.0, 27424409.0, 27517571.0, 27547373.0, 27584206.0, 28676052.0, 29709654.0, 29765036.0, 30774464.0, 32030450.0, 33159613.0, 33476757.0, 34135377.0, 34193337.0, 34958524.0, 36144355.0, 36567630.0, 36950563.0, 36971922.0, 37494273.0, 37855421.0, 37911312.0, 37989420.0, 38051788.0, 38218330.0, 38345747.0, 38420621.0, 38624732.0, 38823526.0, 38876900.0, 38962587.0, 39101659.0, 39226884.0, 39271180.0, 39387557.0, 39439714.0, 39561752.0, 39643971.0, 39673143.0, 39688790.0, 39748498.0, 39758481.0, 39789493.0, 39832372.0, 40003041.0, 40227969.0, 40380014.0, 40511531.0, 40565551.0, 40567797.0, 40624345.0, 40667466.0, 40824391.0, 40944227.0, 41129307.0, 41210096.0, 41277879.0, 41398494.0, 42073897.0, 42310155.0, 42546349.0, 42727821.0, 42826416.0, 42993293.0, 43014521.0, 43062470.0, 43220481.0, 43223027.0, 43301173.0, 43357321.0, 43478228.0, 43823348.0, 43876770.0, 44319684.0, 44369791.0, 44486085.0, 44531864.0, 45035300.0, 45066335.0, 45493803.0, 45495953.0, 45559863.0, 45925310.0, 45927155.0, 46300493.0, 46328187.0, 46798573.0, 46928022.0, 47018208.0, 47219813.0, 47296708.0, 47882498.0, 48535050.0, 48613244.0, 48692634.0, 49624233.0, 50184623.0, 50773492.0, 50775319.0, 51263061.0, 51581192.0, 51581222.0, 51842981.0, 52278746.0, 52280706.0, 52466999.0, 52544493.0, 52779208.0, 53403873.0, 54287989.0, 54782889.0, 54929359.0, 55019821.0, 55646830.0, 57249384.0, 57249913.0, 57325991.0, 59743243.0]', 57: '[3796196.0, 21858396.0, 25495565.0]', 58: '[3813145.0, 4154951.0, 6018005.0, 6040632.0, 6179742.0, 6395409.0, 6481277.0, 9158815.0, 9288505.0, 10699030.0, 13165538.0, 13755942.0, 14985984.0, 15515377.0, 15951653.0, 21965800.0, 22532548.0, 23301780.0, 23973288.0, 24550262.0, 24731087.0, 24876009.0, 25480283.0, 25489069.0, 26724897.0, 27296379.0, 27358904.0, 27410676.0, 46252098.0]', 59: '[3781927.0, 3789640.0, 3813305.0, 10731687.0, 11027021.0, 20414469.0, 23714925.0, 32595626.0, 33029875.0]', 60: '[3700898.0, 3764296.0, 3770459.0, 3773222.0, 3811210.0, 5987130.0, 6119308.0, 6262275.0, 6409776.0, 6450504.0, 6484157.0, 7640046.0, 7646955.0, 7762359.0, 7812486.0, 7813503.0, 7823236.0, 7886063.0, 8103745.0, 10347742.0, 10563528.0, 11004384.0, 11509383.0, 12543065.0, 12556976.0, 12589238.0, 12653339.0, 12666170.0, 12673679.0, 12702964.0, 14026520.0, 14266412.0, 14271281.0, 14325872.0, 14416179.0, 14516479.0, 14785130.0, 15044247.0, 15383496.0, 16127226.0, 16222285.0, 16960430.0, 17266862.0, 17401011.0, 17461197.0, 17474177.0, 17724327.0, 18063449.0, 18250669.0, 18265166.0, 18426307.0, 19300409.0, 19312456.0, 19372912.0, 19550439.0, 19638358.0, 19704233.0, 21801532.0, 21877403.0, 21974791.0, 22002267.0, 22026693.0, 22067617.0, 22089128.0, 22098429.0, 22164670.0, 22223747.0, 22244680.0, 22276463.0, 22298327.0, 22341037.0, 22385483.0, 22395684.0, 22439618.0, 22676560.0, 22718956.0, 22731313.0, 22904054.0, 22918676.0, 23080548.0, 23084056.0, 23218996.0, 23402016.0, 23423296.0, 23516757.0, 23601888.0, 23628604.0, 23848237.0, 23994110.0, 24030077.0, 24083853.0, 24132340.0, 24248118.0, 24295241.0, 24316904.0, 24422851.0, 24429865.0, 24443752.0, 24547890.0, 24589548.0, 24632640.0, 24741062.0, 24770649.0, 24785182.0, 24828348.0, 24839047.0, 24962082.0, 25028009.0, 25031599.0, 25341468.0, 25342918.0, 25378809.0, 25397848.0, 25410040.0, 25434196.0, 25449992.0, 25470970.0, 25494098.0, 25501373.0, 25514405.0, 25525923.0, 25540364.0, 26040210.0, 26228525.0, 26438189.0, 26450647.0, 26451566.0, 26470665.0, 26486031.0, 26707770.0, 26723069.0, 26723453.0, 26735162.0, 26748272.0, 26754314.0, 26870598.0, 26889379.0, 26889380.0, 26901249.0, 26985941.0, 26989589.0, 27000869.0, 27018916.0, 27025822.0, 27060755.0, 27060756.0, 27218208.0, 27293276.0, 27311622.0, 27316775.0, 27340467.0, 27569697.0, 31501140.0, 34800104.0, 37944191.0, 46149961.0, 46255262.0]', 61: '[3815245.0, 3817049.0, 4133414.0, 4237390.0, 6139410.0, 6302055.0, 6327475.0, 6359463.0, 7761745.0, 10634188.0, 10656776.0, 10799990.0, 11834232.0, 16311228.0, 16686050.0, 17340430.0, 21736076.0, 21792800.0, 22060322.0, 22083057.0, 22105805.0, 22177967.0, 22267098.0, 22415413.0, 22587189.0, 22605414.0, 22605428.0, 22626741.0, 22915051.0, 22915132.0, 22915137.0, 22916043.0, 23096413.0, 23212725.0, 23567105.0, 23567123.0, 23591762.0, 23793319.0, 23812585.0, 24102064.0, 24464348.0, 24622307.0, 25253365.0, 25352342.0, 25353269.0, 25427184.0, 25545290.0, 25671035.0, 26295357.0, 26368255.0, 26595469.0, 26726319.0, 26743135.0, 26822697.0, 26997208.0, 26997210.0, 27015502.0, 27015504.0, 27035582.0, 27038209.0, 27056966.0, 27062452.0, 27081705.0, 27383119.0, 27494547.0, 27547324.0]', 62: '[8193903.0, 8212273.0, 9247849.0, 9463029.0, 10512343.0, 11040434.0, 19848880.0, 21871975.0, 22614354.0, 25182231.0, 25355514.0, 27116547.0]', 63: '[3814657.0, 3816821.0, 3818830.0, 9372780.0, 22791620.0, 22805152.0, 23283422.0, 25248920.0, 25586333.0, 27020756.0, 27125092.0, 27145399.0, 27435241.0, 27449240.0, 27582841.0]', 64: '[3775561.0, 3778209.0, 3780242.0, 3783251.0, 3784665.0, 3788774.0, 3798212.0, 3811858.0, 3812283.0, 21830878.0, 21921748.0, 21993829.0, 22457245.0, 22460889.0, 23262728.0, 23400964.0, 23566456.0, 24092138.0, 24403780.0, 25289929.0, 25369658.0, 25618677.0, 25619320.0, 25629177.0, 25634619.0, 25645458.0, 26901477.0, 27038338.0, 27156461.0, 27158001.0, 27372667.0, 27391046.0, 27503418.0, 27537075.0]', 65: '[3769083.0, 3838826.0, 6518919.0, 7655380.0, 7671393.0, 9161974.0, 11933062.0, 12421582.0, 14111284.0, 15041555.0, 17038380.0, 17934524.0, 17951479.0, 17951704.0, 18736765.0, 21855631.0, 22254687.0, 22522730.0, 22525819.0, 22654614.0, 23072375.0, 23161341.0, 23682934.0, 23928270.0, 24002481.0, 25012845.0, 25464571.0, 25530090.0, 25936857.0, 26407346.0, 26861077.0, 41210539.0]', 66: '[3771617.0, 3807056.0, 8167498.0, 9489516.0, 13059819.0, 15236705.0, 17288890.0, 18106562.0, 18243976.0, 19449212.0, 19549705.0, 20360746.0, 21950670.0, 22523056.0, 22590937.0, 22822082.0, 22985088.0, 23085669.0, 23264894.0, 23454885.0, 23791789.0, 24158232.0, 24239892.0, 24257894.0, 24280874.0, 24434788.0, 24953310.0, 24990933.0, 25037706.0, 26312302.0, 26461656.0, 26569604.0, 26755930.0, 26802300.0, 26860472.0, 26891244.0, 26998345.0, 27036330.0, 27157297.0, 27377463.0]', 67: '[8223754.0, 21700957.0, 22248239.0, 24188773.0, 25199790.0, 25489601.0, 27370550.0]', 68: '[3824061.0, 10778962.0, 27157905.0]', 69: '[3885448.0, 4265687.0, 6453737.0, 15055174.0, 21588115.0, 22803210.0, 22810531.0, 22830406.0, 23778134.0, 23779509.0, 26598222.0, 27395145.0, 27536489.0]', 70: '[3817251.0, 3824297.0, 11604215.0, 13348182.0, 15295862.0, 17007082.0, 19729972.0, 19731450.0, 22867664.0, 23356034.0, 24169834.0, 25375270.0, 26970267.0, 27553681.0, 31500731.0, 31500732.0, 35705261.0]', 71: '[5931149.0, 19811894.0, 19812444.0, 22378265.0, 22409405.0, 23400964.0, 24164668.0, 25377816.0, 25484442.0, 26737825.0, 27395052.0, 27403058.0, 27517636.0]', 72: '[3772180.0, 4094759.0, 4099701.0, 4109923.0, 21758734.0, 22489510.0, 22802791.0, 23109074.0, 23332890.0, 23945495.0, 25404671.0, 26988331.0]', 73: '[22556333.0, 23537378.0, 23653584.0, 26050881.0, 26840895.0, 26877180.0, 27462050.0, 27463470.0]', 74: '[3775845.0, 24206625.0]', 75: '[4064369.0, 4172630.0, 8512849.0, 8513675.0, 10827902.0, 22681078.0, 24186095.0, 24990003.0, 26677157.0]', 76: '[4215108.0, 5754390.0, 6381956.0, 9309964.0, 13707851.0, 22117877.0]', 77: '[10969359.0, 11059344.0, 17714515.0, 19284446.0, 22690303.0, 26320567.0, 26415947.0]', 78: '[3888446.0, 3888996.0, 14727195.0, 22113364.0, 22782837.0, 25044309.0, 25167905.0, 26670443.0]', 79: '[3887054.0, 3889614.0, 3890522.0, 9303701.0, 9484895.0, 11363415.0, 14241244.0, 15291648.0, 16966026.0, 23250732.0, 24016081.0, 24393431.0, 24563127.0, 24788233.0, 25941613.0, 26366102.0, 27392409.0]', 80: '[27415886.0]'}} within the list cited_docdb_list, however, there are ids that do not appear id docdb_family_id. What I would like to do is to detect the number of ids within cited_docdb_list which also appear in docdb_family_id. Is there a way to do so? My df is very large actually (almost 700000 observations). Please notice that the type of docdb_family_id and cited_docdb_list differs in the data. The expected outcome, for instance for the first couple of docdb_family_ids should be: docdb_family_id nb_included 3498148, 2 3512921, 1 ... where 3498148, 2 comes from the fact that the cited_docdb_list related to 3498148 cites 2 indices that appear in docdb_family_id, namely 3802281 and 3944218. In the same fashion, 3512921 cites 3800683 within cited_docdb_list. Thank you
First idea is test intersection of sets with converted lists of strings to list of integers and get length of sets for nb_included: import ast df['cited_docdb_list'] = df['cited_docdb_list'].apply(ast.literal_eval) sets = set(df['docdb_family_id']) df['nb_included']=[len(set(map(int,x)).intersection(sets)) for x in df['cited_docdb_list']] print (df) docdb_family_id cited_docdb_list \ 0 3498148 [3454392.0, 3489764.0, 3492286.0, 3802281.0, 3... 1 3512921 [22785397.0, 3800683.0] 2 3525647 [3508710.0, 3832248.0, 6015961.0, 9173676.0, 2... 3 3636418 [3482303.0, 3518675.0, 3688207.0, 3688953.0, 7... 4 3673165 [7917626.0, 13587294.0, 15860525.0, 16099836.0... .. ... ... 76 3886195 [4215108.0, 5754390.0, 6381956.0, 9309964.0, 1... 77 3887480 [10969359.0, 11059344.0, 17714515.0, 19284446.... 78 3890389 [3888446.0, 3888996.0, 14727195.0, 22113364.0,... 79 3892024 [3887054.0, 3889614.0, 3890522.0, 9303701.0, 9... 80 3944218 [27415886.0] nb_included 0 2 1 1 2 1 3 1 4 0 .. ... 76 0 77 0 78 0 79 0 80 0 [81 rows x 3 columns] Pandas solution with DataFrame.explode and Series.isin for test membership, last for count Trues aggregate sum: df = (df.assign(cited_docdb_list = df['cited_docdb_list'].apply(ast.literal_eval)) .explode('cited_docdb_list') .astype({'cited_docdb_list':int}) .assign(nb_included=lambda x: x['cited_docdb_list'].isin(x['docdb_family_id'])) .groupby('docdb_family_id', as_index=False)['nb_included'] .sum()) print (df) docdb_family_id nb_included 0 3498148 2 1 3512921 1 2 3525647 1 3 3636418 1 4 3673165 0 .. ... ... 76 3886195 0 77 3887480 0 78 3890389 0 79 3892024 0 80 3944218 0 [81 rows x 2 columns]
Using shift function along with max function Pandas
I am attempting to create a technical indicator ('Supertrend') using Pandas. The formula for this column is recursive. (For people familiar with Pinescript, this column will replicate the result of this Pinescript function): df['st_trendup'] = np.select(df['Close'].shift() > df['st_trendup'].shift(),df[['st_up','st_trendup'.shift()]].max(axis=1),df['st_up']) The problem occurs in the true part of the np.select()because I cannot call .shift() on a string. Normally, I would make a new column that uses .shift() beforehand but since this is recursive, I have to do it all in one line. If possible I'd like to avoid using loops for speed; prefer solutions using native pandas or numpy functions. What I am looking for A way to find max function that can accomodate a .shift() call Columns that are used: def tr(high,low,close1): return max(high - low, abs(high - close1), abs(low - close1)) df['st_closeprev'] = df['Close'].shift() df['st_hl2'] = (df['High']+df['Low'])/2 df['st_tr'] = df.apply(lambda row: tr(row['High'],row['Low'],row['st_closeprev']),axis=1) df['st_atr'] = df['st_tr'].ewm(alpha = 1/pd,adjust=False,min_periods=pd).mean() df['st_up'] = df['st_hl2'] - factor * df['st_atr'] df['st_dn'] = df['st_hl2'] + factor * df['st_atr'] df['st_trendup'] = np.select(df['Close'].shift() > df['st_trendup'].shift(),df[['st_up','st_trendup'.shift()]].max(axis=1),df['st_up']) Sample data obtained by the df.to_dict {'Date': {0: Timestamp('2021-01-01 09:15:00'), 1: Timestamp('2021-01-01 09:30:00'), 2: Timestamp('2021-01-01 09:45:00'), 3: Timestamp('2021-01-01 10:00:00'), 4: Timestamp('2021-01-01 10:15:00'), 5: Timestamp('2021-01-01 10:30:00'), 6: Timestamp('2021-01-01 10:45:00'), 7: Timestamp('2021-01-01 11:00:00'), 8: Timestamp('2021-01-01 11:15:00'), 9: Timestamp('2021-01-01 11:30:00'), 10: Timestamp('2021-01-01 11:45:00'), 11: Timestamp('2021-01-01 12:00:00'), 12: Timestamp('2021-01-01 12:15:00'), 13: Timestamp('2021-01-01 12:30:00'), 14: Timestamp('2021-01-01 12:45:00'), 15: Timestamp('2021-01-01 13:00:00'), 16: Timestamp('2021-01-01 13:15:00'), 17: Timestamp('2021-01-01 13:30:00'), 18: Timestamp('2021-01-01 13:45:00'), 19: Timestamp('2021-01-01 14:00:00'), 20: Timestamp('2021-01-01 14:15:00'), 21: Timestamp('2021-01-01 14:30:00'), 22: Timestamp('2021-01-01 14:45:00'), 23: Timestamp('2021-01-01 15:00:00'), 24: Timestamp('2021-01-01 15:15:00'), 25: Timestamp('2021-01-04 09:15:00')}, 'Open': {0: 31250.0, 1: 31376.0, 2: 31405.0, 3: 31389.4, 4: 31377.5, 5: 31347.8, 6: 31310.8, 7: 31343.4, 8: 31349.5, 9: 31349.9, 10: 31325.1, 11: 31310.9, 12: 31329.0, 13: 31376.0, 14: 31375.5, 15: 31357.4, 16: 31325.0, 17: 31341.1, 18: 31300.0, 19: 31324.5, 20: 31353.3, 21: 31350.0, 22: 31346.9, 23: 31330.0, 24: 31314.3, 25: 31450.2}, 'High': {0: 31407.0, 1: 31425.0, 2: 31411.95, 3: 31389.45, 4: 31382.0, 5: 31350.0, 6: 31354.6, 7: 31359.0, 8: 31370.0, 9: 31364.7, 10: 31350.0, 11: 31337.9, 12: 31378.9, 13: 31419.5, 14: 31377.75, 15: 31360.0, 16: 31367.15, 17: 31345.2, 18: 31340.0, 19: 31367.0, 20: 31375.0, 21: 31370.0, 22: 31350.0, 23: 31334.6, 24: 31329.6, 25: 31599.0}, 'Low': {0: 31250.0, 1: 31367.95, 2: 31352.5, 3: 31331.65, 4: 31301.4, 5: 31303.05, 6: 31310.0, 7: 31325.05, 8: 31335.35, 9: 31315.35, 10: 31281.9, 11: 31292.0, 12: 31316.25, 13: 31352.05, 14: 31335.0, 15: 31322.0, 16: 31318.25, 17: 31261.55, 18: 31283.3, 19: 31324.5, 20: 31322.0, 21: 31332.15, 22: 31324.1, 23: 31300.15, 24: 31280.0, 25: 31430.0}, 'Close': {0: 31375.0, 1: 31398.3, 2: 31386.0, 3: 31377.0, 4: 31342.3, 5: 31311.7, 6: 31345.0, 7: 31349.0, 8: 31344.2, 9: 31327.6, 10: 31311.3, 11: 31325.6, 12: 31373.0, 13: 31375.0, 14: 31357.4, 15: 31326.0, 16: 31345.9, 17: 31300.6, 18: 31324.4, 19: 31353.8, 20: 31345.6, 21: 31341.6, 22: 31332.5, 23: 31311.0, 24: 31285.0, 25: 31558.4}, 'Volume': {0: 259952, 1: 163775, 2: 105900, 3: 99725, 4: 115175, 5: 78625, 6: 67675, 7: 46575, 8: 53350, 9: 54175, 10: 96975, 11: 80925, 12: 79475, 13: 147775, 14: 38900, 15: 64925, 16: 52425, 17: 142175, 18: 81800, 19: 74950, 20: 68550, 21: 40350, 22: 47150, 23: 119200, 24: 222875, 25: 524625}}
Change: df[['st_up','st_trendup'.shift()]].max(axis=1) to: df[['st_up','st_trendup']].assign(st_trendup = df['st_trendup'].shift()).max(axis=1)
joining/merging both index and non-index columns in a pandas multi-index
Context: I have two very large pandas dataframes to join which barely fit in memory (8GB each, millions of rows) and have the challenge of performing a performant join using combinations of both indexed and non-indexed columns. Fuzzy joining is out of scope. Variables in order of cardinality: dataset_1 has these variables: postcode, street_name, secondary_number, primary_number, unique_id dataset_2 has these variables: postcode, street_name, house_number, house_name, sub_building_name, different_unique_id postcode and street_name are shared keys, and multiindexing seems the correct choice to improve joining performance in pandas: dataset_1 = dataset_1.set_index(['postcode', 'street', "unique_id"]).sort_index() dataset_2 = dataset_2.set_index(['postcode', 'street', "different_unique_id"]).sort_index() Processing: At this stage I can compute in pandas if memory allows. If not, I would use Dask, however it can't handle multi-indexes. In the event this were possible (or unnecessary) the sorting would still need to be handled in pandas as Dask cannot manage this. If Dask were an option this is how I would convert: dd1 = dd.from_pandas(dataset_1, npartitions=1) #large left dataframe del dataset_1 #to release the memory dd2 = dd.from_pandas(dataset_2, npartitions=3) #partitioned right dataframe for performance del dataset_2 #to release the memory Problem: The challenge is performing an inner join on non-null variables using the indexes ("postcode" and "street"), alongside non-indexed columns. Combinations of the non-indexed variables will be iterated in a for loop. Solution Sketch: This gives an idea what I would like to do to maintain the performance gains from the indexing, but is of course not syntactically possible: output = pd.merge(df1, df2, how='inner', left_on=["postcode", "street_name", "secondary_number", "primary_number"], right_on=["postcode", "street_name", "house_name", "house_number"], left_index=[True,True,False,False], right_index=[True,True,False,False]) Summary: My understanding is that pd.join can handle non-indexed and indexed columns, whereas pd.merge cannot. As a result I'm unsure how to achieve this join in pd.join where there is a combination of both indexed and non-indexed columns. Example of intersects: {'different_unique_id': {27: '{582D0636-8DEF-8F22-E053-6C04A8C01BAC}', 41: '{D9E869FE-7B55-4C36-AC43-695B9033A13B}', 33: '{93E6821E-554E-40FD-E053-6B04A8C0C1DF}', 1: '{288DCE29-0589-E510-E050-A8C06205480E}', 48: '{3A23DDD5-A0E8-41D2-A514-5B09385C301F}', 52: '{CEB16957-F7FA-4D1B-B45F-A390214735BC}', 13: '{404A5AF3-9B20-CD2B-E050-A8C063055C7B}', 16: '{64342BFD-FD07-422C-E053-6C04A8C0FB8A}', 57: '{29A8E769-8A10-4477-9494-FF55EF5FAE4B}', 10: '{404A5AF3-0B58-CD2B-E050-A8C063055C7B}', 21: '{55BDCAE6-0C10-521D-E053-6B04A8C0DD7A}', 31: '{5C676A02-1781-4152-950C-6E5CA2CBC487}', 7: '{68FEB20B-142E-38DA-E053-6C04A8C051AE}', 45: '{8F1B26BD-673F-53DB-E053-6C04A8C03649}', 12: '{2F115F7A-8F81-4124-9FD4-FB76E742B2C1}', 36: '{344AB2D7-4B59-4AB4-8F52-75B29BE8C509}', 20: '{965B6D91-D4B6-95E4-E053-6C04A8C07729}', 56: '{59872FD9-F39D-4BB9-95F6-91E002D948B1}', 22: '{6141DFF0-973F-4FEC-A582-7F310B566031}'}, 'unique_id': {27: 10002277489, 41: 64023255, 33: 10007367447, 1: 22229221, 48: 10033235735, 52: 100062162615, 13: 50103744, 16: 10022903998, 57: 12015624, 10: 12154940, 21: 10024247587, 31: 100041193990, 7: 10008230730, 45: 10091640210, 12: 202107394, 36: 5062293, 20: 48114659, 56: 10001311242, 22: 10000443154}, 'street': {27: 'thewharf', 41: 'parkroad', 33: 'oldmillclose', 1: 'thirdavenue', 48: 'woolnersway', 52: 'sumnerroad', 13: 'cliftongardens', 16: 'windhamroad', 57: 'westparkroad', 10: 'grangeroad', 21: 'staplersroad', 31: 'strand', 7: 'amhurstroad', 45: 'eatonroad', 12: 'northendroad', 36: 'belsizegrove', 20: 'watermillway', 56: 'orchardplace', 22: 'thurlowparkroad'}, 'postcode': {27: 'lu72la', 41: 'cf626nt', 33: 'hr40aq', 1: 'bn32pd', 48: 'sg13ae', 52: 'gu97jx', 13: 'ct202ef', 16: 'bh14rn', 57: 'ub24af', 10: 'w55bu', 21: 'po302dp', 31: 'tq148aq', 7: 'e82ag', 45: 'ch47ew', 12: 'ha90ae', 36: 'nw34tt', 20: 'sw192rw', 56: 'so143hw', 22: 'se218hp'}, 'secondary_number': {27: '76', 41: 'flat6', 33: '49', 1: 'flat10', 48: '145', 52: '31', 13: 'flat19', 16: 'flat7', 57: '76', 10: 'flat1', 21: 'flat1', 31: 'flat43', 7: 'flata', 45: '8', 12: '42', 36: 'flat9', 20: 'flat43', 56: 'flat156', 22: 'flat2'}, 'primary_number': {27: 'eastdock', 41: 'courtlands', 33: 'watkinscourt', 1: 'ascothouse', 48: 'monumentcourt', 52: 'sumnercourt', 13: '22-24', 16: '77', 57: 'osterleyviews', 10: '55-59', 21: '138', 31: 'leandercourt', 7: '130', 45: 'greenbankhall', 12: 'danescourt', 36: 'holmefieldcourt', 20: 'bennetscourtyard', 56: 'oceanaboulevard', 22: '124f'}, 'building_name': {27: 'eastdock', 41: 'courtlands', 33: 'watkinscourt', 1: 'ascothouse', 48: 'monumentcourt', 52: 'sumnercourt', 13: None, 16: None, 57: 'osterleyviews', 10: None, 21: None, 31: 'leandercourt', 7: None, 45: 'greenbankhall', 12: 'danescourt', 36: 'holmefieldcourt', 20: 'bennetscourtyard', 56: 'oceanaboulevard', 22: None}, 'building_number': {27: None, 41: None, 33: None, 1: '18-20', 48: None, 52: None, 13: '22-24', 16: '77', 57: None, 10: '55-59', 21: '138', 31: None, 7: '130', 45: None, 12: None, 36: None, 20: None, 56: None, 22: '124f'}, 'sub_building': {27: '76', 41: 'flat6', 33: '49', 1: 'flat10', 48: '145', 52: '31', 13: 'flat19', 16: 'flat7', 57: '76', 10: 'flat1', 21: 'flat1', 31: 'flat43', 7: 'flata', 45: '8', 12: '42', 36: 'flat9', 20: 'flat43', 56: 'flat156', 22: 'flat2'}}
I am using Apache opennlp 1.8.0, I am trying to use POSTaggerTrainer for training
After referring to the documentation of version 1.8.0 I tried the CLI command given in the doc, it doesn't seem to be working, nor is the Java code given under API. I have a text file with the following text: train-me.txt Last_JJ September_NNP ,_, I_PRP tried_VBD to_TO find_VB out_RP the_DT address_NN of_IN an_DT old_JJ school_NN friend_NN whom_WP I_PRP had_VBD not_RB seen_VBN for_IN 15_CD years_NNS ._. I_PRP just_RB knew_VBD his_PRP$ name_NN ,_, Alan_NNP McKennedy_NNP ,_, and_CC I_PRP 'd_MD heard_VBD the_DT rumour_NN that_IN he_PRP 'd_MD moved_VBD to_TO Scotland_NNP ,_, the_DT country_NN of_IN his_PRP$ ancestors_NNS ._. dictionary.xml <?xml version="1.0" encoding="UTF-8"?><dictionary> <entry tags="NNP"> <token>Calysta</token> </entry> </dictionary> I want to use either of these two (if possible) to train the program to tag Calysta as Calysta_NNP
I know its kind of a late answer, but if it helps.. arjun#arjun-VPCEH26EN:~/apache-opennlp-1.8.0/bin$ ./opennlp POSTaggerTrainer -data train-me.txt -dict dictionary.xml -lang en -model en-pos-maxent-cust.bin Indexing events using cutoff of 5 Computing event counts... done. 52 events Indexing... done. Sorting and merging events... done. Reduced 52 events to 37. Done indexing. Incorporating indexed data for training... done. Number of Event Tokens: 37 Number of Outcomes: 20 Number of Predicates: 13 ...done. Computing model parameters ... Performing 100 iterations. 1: ... loglikelihood=-155.77807822480764 0.038461538461538464 2: ... loglikelihood=-130.9791219262959 0.5 3: ... loglikelihood=-115.82234962334346 0.5576923076923077 4: ... loglikelihood=-105.13170003394434 0.6730769230769231 5: ... loglikelihood=-96.9869322585347 0.6730769230769231 6: ... loglikelihood=-90.51694300405765 0.6923076923076923 7: ... loglikelihood=-85.23546058034727 0.6923076923076923 8: ... loglikelihood=-80.83562367302892 0.7307692307692307 9: ... loglikelihood=-77.1097811259408 0.7307692307692307 10: ... loglikelihood=-73.91120812658458 0.7307692307692307 11: ... loglikelihood=-71.13309894938885 0.75 12: ... loglikelihood=-68.69589846103266 0.75 13: ... loglikelihood=-66.53917914878002 0.75 14: ... loglikelihood=-64.61622830997396 0.75 15: ... loglikelihood=-62.890348665987055 0.75 16: ... loglikelihood=-61.332281582677155 0.75 17: ... loglikelihood=-59.91838269276684 0.75 18: ... loglikelihood=-58.629310291693805 0.75 19: ... loglikelihood=-57.44906823464401 0.75 20: ... loglikelihood=-56.36429724151985 0.75 21: ... loglikelihood=-55.36374258766163 0.75 22: ... loglikelihood=-54.43784870333842 0.75 23: ... loglikelihood=-53.57844629573773 0.75 24: ... loglikelihood=-52.77850781690259 0.75 25: ... loglikelihood=-52.03195408008879 0.75 26: ... loglikelihood=-51.333499646171695 0.75 27: ... loglikelihood=-50.67852796323892 0.75 28: ... loglikelihood=-50.062989611378285 0.75 29: ... loglikelihood=-49.48331869161687 0.75 30: ... loglikelihood=-48.93636361232364 0.75 31: ... loglikelihood=-48.419329410290345 0.75 32: ... loglikelihood=-47.92972939439551 0.75 33: ... loglikelihood=-47.465344384258486 0.75 34: ... loglikelihood=-47.02418818116749 0.75 35: ... loglikelihood=-46.604478186421446 0.75 36: ... loglikelihood=-46.20461029609541 0.75 37: ... loglikelihood=-45.82313736754338 0.75 38: ... loglikelihood=-45.458750683509976 0.75 39: ... loglikelihood=-45.11026394313063 0.75 40: ... loglikelihood=-44.77659939167084 0.75 41: ... loglikelihood=-44.45677576728319 0.75 42: ... loglikelihood=-44.14989779685863 0.75 43: ... loglikelihood=-43.855147016888836 0.75 44: ... loglikelihood=-43.571773731178716 0.75 45: ... loglikelihood=-43.299089946831224 0.75 46: ... loglikelihood=-43.03646315440174 0.75 47: ... loglikelihood=-42.78331083845189 0.75 48: ... loglikelihood=-42.53909562169248 0.75 49: ... loglikelihood=-42.30332096009808 0.7692307692307693 50: ... loglikelihood=-42.07552731829657 0.7692307692307693 51: ... loglikelihood=-41.85528876457919 0.7692307692307693 52: ... loglikelihood=-41.642209933359936 0.7692307692307693 53: ... loglikelihood=-41.43592331010347 0.7692307692307693 54: ... loglikelihood=-41.236086799846426 0.7692307692307693 55: ... loglikelihood=-41.04238154563922 0.7692307692307693 56: ... loglikelihood=-40.854509967677004 0.7692307692307693 57: ... loglikelihood=-40.67219399768791 0.7692307692307693 58: ... loglikelihood=-40.49517348640929 0.7692307692307693 59: ... loglikelihood=-40.32320476478338 0.7692307692307693 60: ... loglikelihood=-40.1560593419208 0.7692307692307693 61: ... loglikelihood=-39.99352272496435 0.7692307692307693 62: ... loglikelihood=-39.835393347789605 0.7692307692307693 63: ... loglikelihood=-39.68148159704321 0.7692307692307693 64: ... loglikelihood=-39.53160892537774 0.7692307692307693 65: ... loglikelihood=-39.38560704292392 0.7692307692307693 66: ... loglikelihood=-39.243317179072264 0.7692307692307693 67: ... loglikelihood=-39.10458940753585 0.7692307692307693 68: ... loglikelihood=-38.969282028454 0.7692307692307693 69: ... loglikelihood=-38.8372610019872 0.7692307692307693 70: ... loglikelihood=-38.70839942845979 0.7692307692307693 71: ... loglikelihood=-38.58257707064014 0.7692307692307693 72: ... loglikelihood=-38.45967991421811 0.7692307692307693 73: ... loglikelihood=-38.33959976295419 0.7692307692307693 74: ... loglikelihood=-38.222233865340385 0.7692307692307693 75: ... loglikelihood=-38.107484569938585 0.7692307692307693 76: ... loglikelihood=-37.995259006848066 0.7692307692307693 77: ... loglikelihood=-37.88546879301048 0.7692307692307693 78: ... loglikelihood=-37.77802975928638 0.7692307692307693 79: ... loglikelihood=-37.6728616974405 0.7692307692307693 80: ... loglikelihood=-37.56988812535212 0.7692307692307693 81: ... loglikelihood=-37.469036068928645 0.7692307692307693 82: ... loglikelihood=-37.370235859343474 0.7692307692307693 83: ... loglikelihood=-37.27342094434868 0.7692307692307693 84: ... loglikelihood=-37.178527712527796 0.7692307692307693 85: ... loglikelihood=-37.08549532945806 0.7692307692307693 86: ... loglikelihood=-36.99426558484419 0.7692307692307693 87: ... loglikelihood=-36.904782749769446 0.7692307692307693 88: ... loglikelihood=-36.81699344328549 0.7692307692307693 89: ... loglikelihood=-36.730846507630154 0.7692307692307693 90: ... loglikelihood=-36.64629289142378 0.7692307692307693 91: ... loglikelihood=-36.563285540250355 0.7692307692307693 92: ... loglikelihood=-36.48177929407976 0.7692307692307693 93: ... loglikelihood=-36.40173079103272 0.7692307692307693 94: ... loglikelihood=-36.32309837703207 0.7692307692307693 95: ... loglikelihood=-36.24584202091997 0.7692307692307693 96: ... loglikelihood=-36.16992323465651 0.7692307692307693 97: ... loglikelihood=-36.095304998244124 0.7692307692307693 98: ... loglikelihood=-36.021951689052344 0.7692307692307693 99: ... loglikelihood=-35.94982901524132 0.7692307692307693 100: ... loglikelihood=-35.87890395300729 0.7692307692307693 Writing pos tagger model ... done (0.086s) Wrote pos tagger model to path: /home/arjun/apache-opennlp-1.8.0/bin/en-pos-maxent-cust.bin Execution time: 0.522 seconds I used Apache OpenNLP 1.8.0. Revert if you need any help with Apache OpenNLP POS Tagger.
Creating a Dropdown menu in Plotly from Pandas
I've had a look at the following link but its not very clear https://plot.ly/pandas/dropdowns/. I have the following figure generated in plotly but would like a dropdown menu (of A, B and C) to select and display the respective line only import pandas as pd import plotly plotly.offline.init_notebook_mode() import plotly.offline as py from plotly.graph_objs import * df = pd.DataFrame({'freq': {0: 0.01, 1: 0.02, 2: 0.029999999999999999, 3: 0.040000000000000001, 4: 0.050000000000000003, 5: 0.059999999999999998, 6: 0.070000000000000007, 7: 0.080000000000000002, 8: 0.089999999999999997, 9: 0.10000000000000001, 10: 0.01, 11: 0.02, 12: 0.029999999999999999, 13: 0.040000000000000001, 14: 0.050000000000000003, 15: 0.059999999999999998, 16: 0.070000000000000007, 17: 0.080000000000000002, 18: 0.089999999999999997, 19: 0.10000000000000001, 20: 0.01, 21: 0.02, 22: 0.029999999999999999, 23: 0.040000000000000001, 24: 0.050000000000000003, 25: 0.059999999999999998, 26: 0.070000000000000007, 27: 0.080000000000000002, 28: 0.089999999999999997, 29: 0.10000000000000001}, 'kit': {0: 'B', 1: 'B', 2: 'B', 3: 'B', 4: 'B', 5: 'B', 6: 'B', 7: 'B', 8: 'B', 9: 'B', 10: 'A', 11: 'A', 12: 'A', 13: 'A', 14: 'A', 15: 'A', 16: 'A', 17: 'A', 18: 'A', 19: 'A', 20: 'C', 21: 'C', 22: 'C', 23: 'C', 24: 'C', 25: 'C', 26: 'C', 27: 'C', 28: 'C', 29: 'C'}, 'SNS': {0: 91.198979591799997, 1: 90.263605442199989, 2: 88.818027210899999, 3: 85.671768707499993, 4: 76.23299319729999, 5: 61.0969387755, 6: 45.1530612245, 7: 36.267006802700003, 8: 33.0782312925, 9: 30.739795918400002, 10: 90.646258503400006, 11: 90.306122449, 12: 90.178571428600009, 13: 89.498299319699996, 14: 88.435374149599994, 15: 83.588435374200003, 16: 75.212585034, 17: 60.969387755100001, 18: 47.278911564600001, 19: 37.627551020399999, 20: 90.986394557800011, 21: 90.136054421799997, 22: 89.540816326499993, 23: 88.690476190499993, 24: 86.479591836799997, 25: 82.397959183699996, 26: 73.809523809499993, 27: 63.180272108800004, 28: 50.935374149700003, 29: 41.241496598699996}, 'FPR': {0: 1.0953616823100001, 1: 0.24489252678500001, 2: 0.15106142277199999, 3: 0.104478605177, 4: 0.089172822253300005, 5: 0.079856258734300009, 6: 0.065881413455800009, 7: 0.059892194050699996, 8: 0.059892194050699996, 9: 0.0578957875824, 10: 0.94097291541899997, 11: 0.208291741532, 12: 0.14773407865800001, 13: 0.107805949291, 14: 0.093165635189999998, 15: 0.082518134025399995, 16: 0.074532508152000007, 17: 0.065881413455800009, 18: 0.062554069341799995, 19: 0.061888600519100001, 20: 0.85313103081100006, 21: 0.18899314567100001, 22: 0.14107939043000001, 23: 0.110467824582, 24: 0.099820323417899995, 25: 0.085180009316599997, 26: 0.078525321088700001, 27: 0.073201570506399985, 28: 0.071870632860800004, 29: 0.0705396952153}}) fig = { 'data': [ { 'x': df[df['kit']==kit]['FPR'], 'y': df[df['kit']==kit]['SNS'], 'name': kit, } for kit in ['A', 'B', 'C'] ], } py.iplot(fig)
I'm not sure how to do this directly from plotly; however, you can use interact function from ipywidgets library. In your case it will be the following: from ipywidgets import interact df = pd.DataFrame({'freq': {0: 0.01, 1: 0.02, 2: 0.029999999999999999, 3: 0.040000000000000001, 4: 0.050000000000000003, 5: 0.059999999999999998, 6: 0.070000000000000007, 7: 0.080000000000000002, 8: 0.089999999999999997, 9: 0.10000000000000001, 10: 0.01, 11: 0.02, 12: 0.029999999999999999, 13: 0.040000000000000001, 14: 0.050000000000000003, 15: 0.059999999999999998, 16: 0.070000000000000007, 17: 0.080000000000000002, 18: 0.089999999999999997, 19: 0.10000000000000001, 20: 0.01, 21: 0.02, 22: 0.029999999999999999, 23: 0.040000000000000001, 24: 0.050000000000000003, 25: 0.059999999999999998, 26: 0.070000000000000007, 27: 0.080000000000000002, 28: 0.089999999999999997, 29: 0.10000000000000001}, 'kit': {0: 'B', 1: 'B', 2: 'B', 3: 'B', 4: 'B', 5: 'B', 6: 'B', 7: 'B', 8: 'B', 9: 'B', 10: 'A', 11: 'A', 12: 'A', 13: 'A', 14: 'A', 15: 'A', 16: 'A', 17: 'A', 18: 'A', 19: 'A', 20: 'C', 21: 'C', 22: 'C', 23: 'C', 24: 'C', 25: 'C', 26: 'C', 27: 'C', 28: 'C', 29: 'C'}, 'SNS': {0: 91.198979591799997, 1: 90.263605442199989, 2: 88.818027210899999, 3: 85.671768707499993, 4: 76.23299319729999, 5: 61.0969387755, 6: 45.1530612245, 7: 36.267006802700003, 8: 33.0782312925, 9: 30.739795918400002, 10: 90.646258503400006, 11: 90.306122449, 12: 90.178571428600009, 13: 89.498299319699996, 14: 88.435374149599994, 15: 83.588435374200003, 16: 75.212585034, 17: 60.969387755100001, 18: 47.278911564600001, 19: 37.627551020399999, 20: 90.986394557800011, 21: 90.136054421799997, 22: 89.540816326499993, 23: 88.690476190499993, 24: 86.479591836799997, 25: 82.397959183699996, 26: 73.809523809499993, 27: 63.180272108800004, 28: 50.935374149700003, 29: 41.241496598699996}, 'FPR': {0: 1.0953616823100001, 1: 0.24489252678500001, 2: 0.15106142277199999, 3: 0.104478605177, 4: 0.089172822253300005, 5: 0.079856258734300009, 6: 0.065881413455800009, 7: 0.059892194050699996, 8: 0.059892194050699996, 9: 0.0578957875824, 10: 0.94097291541899997, 11: 0.208291741532, 12: 0.14773407865800001, 13: 0.107805949291, 14: 0.093165635189999998, 15: 0.082518134025399995, 16: 0.074532508152000007, 17: 0.065881413455800009, 18: 0.062554069341799995, 19: 0.061888600519100001, 20: 0.85313103081100006, 21: 0.18899314567100001, 22: 0.14107939043000001, 23: 0.110467824582, 24: 0.099820323417899995, 25: 0.085180009316599997, 26: 0.078525321088700001, 27: 0.073201570506399985, 28: 0.071870632860800004, 29: 0.0705396952153}}) def plot_it(kit): fig = { 'data': [ { 'x': df[df['kit']==kit]['FPR'], 'y': df[df['kit']==kit]['SNS'], 'name': kit } ] } py.iplot(fig) interact(plot_it, kit=('A', 'B', 'C'))