Pandas Multivariate Linear Regression by Group and Saving Results as csv - pandas

I am trying to calculate linear regression of Y=C-A column, x = ['Plate X', 'Plate Y', 'Field X'] and group those values by Drum and Plate. Additional question - how to save results as a file, csv preferable.
Is pandas package is sufficient for this task or other package needed.
Thank you
There is my data set:
DF = {'A': {0: 305.03277000000003,
1: 304.42513500000001,
2: 305.119575,
3: 304.42513500000001,
4: 304.07791500000002,
5: 304.85916000000003,
6: 305.72721000000001,
7: 305.81401499999998,
8: 304.07791500000002,
9: 305.03277000000003,
10: 304.68554999999998,
11: 304.945965,
12: 303.38347499999998,
13: 304.945965,
14: 304.51193999999998,
15: 304.25152500000002,
16: 304.51193999999998,
17: 304.25152500000002,
18: 304.42513500000001,
19: 304.85916000000003,
20: 303.8175,
21: 305.119575,
22: 304.59874500000001,
23: 304.68554999999998,
24: 304.33832999999998,
25: 303.90430499999997,
26: 304.68554999999998,
27: 304.772355,
28: 304.59874500000001,
29: 304.772355,
30: 304.59874500000001,
31: 305.119575,
32: 305.37998999999996,
33: 304.59874500000001,
34: 304.42513500000001,
35: 304.33832999999998,
36: 304.51193999999998,
37: 305.46679499999999,
38: 304.59874500000001,
39: 305.29318499999999,
40: 304.85916000000003,
41: 305.29318499999999,
42: 305.119575,
43: 304.945965,
44: 305.29318499999999,
45: 304.85916000000003,
46: 305.72721000000001,
47: 306.16123500000003,
48: 305.37998999999996,
49: 305.03277000000003,
50: 305.20637999999997,
51: 304.51193999999998,
52: 308.33136000000002,
53: 305.81401499999998,
54: 305.55360000000002,
55: 306.42165,
56: 305.64040499999999,
57: 305.29318499999999,
58: 305.37998999999996,
59: 304.772355,
60: 305.37998999999996,
61: 305.72721000000001,
62: 305.90082000000001,
63: 305.64040499999999,
64: 305.81401499999998,
65: 304.85916000000003,
66: 305.20637999999997,
67: 306.42165,
68: 305.64040499999999,
69: 305.55360000000002,
70: 304.59874500000001,
71: 305.55360000000002,
72: 306.07443000000001,
73: 306.42165,
74: 305.98762499999998,
75: 306.68206499999997,
76: 305.03277000000003,
77: 305.46679499999999,
78: 306.42165,
79: 304.85916000000003,
80: 304.51193999999998,
81: 303.8175,
82: 304.51193999999998,
83: 304.16472000000005,
84: 304.51193999999998,
85: 303.73069500000003,
86: 303.29667000000001,
87: 304.68554999999998,
88: 303.73069500000003,
89: 304.42513500000001,
90: 304.51193999999998,
91: 304.16472000000005,
92: 304.945965,
93: 304.772355,
94: 304.42513500000001,
95: 304.16472000000005,
96: 305.119575,
97: 304.16472000000005,
98: 304.25152500000002,
99: 305.20637999999997},
'B': {0: 311.10912000000002,
1: 310.93551000000002,
2: 313.279245,
3: 313.19243999999998,
4: 309.11260499999997,
5: 309.0258,
6: 309.72023999999999,
7: 313.279245,
8: 311.89036499999997,
9: 311.19592499999999,
10: 308.76538500000004,
11: 309.72023999999999,
12: 312.15078,
13: 309.19941,
14: 308.50497000000001,
15: 308.33136000000002,
16: 309.89384999999999,
17: 310.848705,
18: 312.23758500000002,
19: 313.53966000000003,
20: 309.72023999999999,
21: 309.11260499999997,
22: 311.89036499999997,
23: 309.98065499999996,
24: 309.19941,
25: 310.41467999999998,
26: 311.62995000000001,
27: 311.02231499999999,
28: 310.32787500000001,
29: 310.06745999999998,
30: 311.89036499999997,
31: 311.89036499999997,
32: 309.98065499999996,
33: 312.06397500000003,
34: 306.85567500000002,
35: 309.98065499999996,
36: 311.80356,
37: 309.19941,
38: 312.41119500000002,
39: 310.848705,
40: 311.10912000000002,
41: 310.501485,
42: 313.80007499999999,
43: 308.24455499999999,
44: 312.49799999999999,
45: 313.10563500000001,
46: 313.19243999999998,
47: 309.63343500000002,
48: 311.10912000000002,
49: 310.501485,
50: 310.58828999999997,
51: 314.23410000000001,
52: 312.41119500000002,
53: 313.01882999999998,
54: 311.19592499999999,
55: 311.54314500000004,
56: 313.279245,
57: 311.54314500000004,
58: 311.45634000000001,
59: 313.19243999999998,
60: 312.15078,
61: 312.15078,
62: 313.452855,
63: 311.02231499999999,
64: 311.02231499999999,
65: 311.28272999999996,
66: 311.02231499999999,
67: 307.897335,
68: 313.19243999999998,
69: 311.97717,
70: 311.10912000000002,
71: 312.58480499999996,
72: 312.58480499999996,
73: 315.01534500000002,
74: 311.97717,
75: 313.452855,
76: 311.80356,
77: 308.67857999999995,
78: 311.71675499999998,
79: 311.36953499999998,
80: 310.501485,
81: 308.85219000000001,
82: 311.10912000000002,
83: 309.37302,
84: 307.98413999999997,
85: 311.10912000000002,
86: 311.28272999999996,
87: 310.93551000000002,
88: 310.24107000000004,
89: 307.11608999999999,
90: 307.55011500000001,
91: 308.76538500000004,
92: 310.848705,
93: 307.02928500000002,
94: 309.89384999999999,
95: 311.28272999999996,
96: 307.81052999999997,
97: 309.72023999999999,
98: 311.54314500000004,
99: 310.32787500000001},
'C': {0: 305.72721000000001,
1: 306.00498599999997,
2: 306.49109399999998,
3: 306.59526,
4: 305.48415599999998,
5: 305.24110200000001,
6: 306.28276199999999,
7: 306.97720199999998,
8: 306.80359199999998,
9: 307.081368,
10: 306.10915199999999,
11: 304.47721799999999,
12: 305.24110200000001,
13: 304.68554999999998,
14: 306.35220600000002,
15: 305.17165799999998,
16: 306.45637200000004,
17: 305.86609800000002,
18: 306.734148,
19: 306.28276199999999,
20: 305.51887799999997,
21: 308.053584,
22: 306.52581600000002,
23: 305.935542,
24: 306.56053800000001,
25: 306.10915199999999,
26: 306.56053800000001,
27: 305.79665399999999,
28: 305.761932,
29: 304.75499400000001,
30: 306.07443000000001,
31: 306.35220600000002,
32: 305.86609800000002,
33: 307.01192400000002,
34: 306.28276199999999,
35: 305.55360000000002,
36: 306.35220600000002,
37: 306.80359199999998,
38: 305.90082000000001,
39: 306.03970800000002,
40: 307.18553399999996,
41: 304.82443799999999,
42: 305.83137599999998,
43: 306.97720199999998,
44: 306.38692799999995,
45: 306.49109399999998,
46: 306.38692799999995,
47: 306.52581600000002,
48: 305.06749200000002,
49: 306.07443000000001,
50: 306.56053800000001,
51: 305.48415599999998,
52: 305.69248799999997,
53: 307.63692000000003,
54: 307.28969999999998,
55: 305.62304399999999,
56: 306.38692799999995,
57: 305.86609800000002,
58: 306.56053800000001,
59: 305.55360000000002,
60: 306.07443000000001,
61: 306.52581600000002,
62: 306.56053800000001,
63: 305.34526800000003,
64: 305.24110200000001,
65: 304.58138399999996,
66: 307.04664600000001,
67: 306.00498599999997,
68: 305.79665399999999,
69: 306.49109399999998,
70: 305.51887799999997,
71: 305.72721000000001,
72: 306.31748399999998,
73: 306.03970800000002,
74: 307.15081200000003,
75: 307.60219799999999,
76: 304.92860400000001,
77: 304.68554999999998,
78: 305.58832200000001,
79: 305.449434,
80: 306.83831400000003,
81: 306.49109399999998,
82: 306.94247999999999,
83: 304.963326,
84: 307.25497799999999,
85: 305.97026399999999,
86: 306.07443000000001,
87: 305.761932,
88: 305.90082000000001,
89: 306.31748399999998,
90: 306.69942599999996,
91: 306.07443000000001,
92: 305.449434,
93: 304.789716,
94: 304.72027200000002,
95: 306.10915199999999,
96: 305.449434,
97: 305.31054599999999,
98: 305.31054599999999,
99: 306.45637200000004},
'C-A': {0: 0.69443999999999995,
1: 1.5798510000000001,
2: 1.3715190000000002,
3: 2.1701250000000001,
4: 1.4062410000000001,
5: 0.381942,
6: 0.55555200000000005,
7: 1.163187,
8: 2.7256770000000001,
9: 2.0485980000000001,
10: 1.423602,
11: -0.46874700000000002,
12: 1.8576270000000001,
13: -0.26041500000000001,
14: 1.840266,
15: 0.92013299999999998,
16: 1.9444319999999999,
17: 1.614573,
18: 2.3090130000000002,
19: 1.423602,
20: 1.7013779999999998,
21: 2.9340090000000001,
22: 1.927071,
23: 1.249992,
24: 2.2222080000000002,
25: 2.204847,
26: 1.8749880000000001,
27: 1.0242990000000001,
28: 1.163187,
29: -0.017361000000000001,
30: 1.4756850000000001,
31: 1.232631,
32: 0.48610799999999998,
33: 2.413179,
34: 1.8576270000000001,
35: 1.2152700000000001,
36: 1.840266,
37: 1.336797,
38: 1.3020750000000001,
39: 0.74652299999999994,
40: 2.3263739999999999,
41: -0.46874700000000002,
42: 0.71180100000000002,
43: 2.031237,
44: 1.0937430000000001,
45: 1.631934,
46: 0.65971800000000003,
47: 0.36458099999999999,
48: -0.312498,
49: 1.04166,
50: 1.354158,
51: 0.97221599999999997,
52: -2.6388720000000001,
53: 1.822905,
54: 1.7361,
55: -0.79860600000000004,
56: 0.74652299999999994,
57: 0.57291300000000001,
58: 1.1805479999999999,
59: 0.78124499999999997,
60: 0.69443999999999995,
61: 0.79860600000000004,
62: 0.65971800000000003,
63: -0.29513699999999998,
64: -0.57291300000000001,
65: -0.27777600000000002,
66: 1.840266,
67: -0.41666400000000003,
68: 0.156249,
69: 0.93749400000000005,
70: 0.92013299999999998,
71: 0.17360999999999999,
72: 0.24305399999999999,
73: -0.381942,
74: 1.163187,
75: 0.92013299999999998,
76: -0.10416600000000001,
77: -0.78124499999999997,
78: -0.83332800000000007,
79: 0.59027399999999997,
80: 2.3263739999999999,
81: 2.673594,
82: 2.4305400000000001,
83: 0.79860600000000004,
84: 2.7430380000000003,
85: 2.2395689999999999,
86: 2.7777599999999998,
87: 1.0763819999999999,
88: 2.1701250000000001,
89: 1.8923490000000001,
90: 2.1874860000000003,
91: 1.9097099999999998,
92: 0.50346899999999994,
93: 0.017361000000000001,
94: 0.29513699999999998,
95: 1.9444319999999999,
96: 0.32985900000000001,
97: 1.145826,
98: 1.059021,
99: 1.249992},
'Drum': {0: 'LAAA',
1: 'LAAA',
2: 'LAAA',
3: 'LAAA',
4: 'LAAA',
5: 'LAAA',
6: 'LAAA',
7: 'LAAA',
8: 'LAAA',
9: 'LAAA',
10: 'LAAA',
11: 'LAAA',
12: 'LAAA',
13: 'LAAA',
14: 'LAAA',
15: 'LAAA',
16: 'LAAA',
17: 'LAAA',
18: 'LAAA',
19: 'LAAA',
20: 'LAAA',
21: 'LAAA',
22: 'LAAA',
23: 'LAAA',
24: 'LAAA',
25: 'LAAA',
26: 'LAAA',
27: 'LAAA',
28: 'LAAA',
29: 'LAAA',
30: 'LAAA',
31: 'LAAA',
32: 'LAAA',
33: 'LAAA',
34: 'LAAA',
35: 'LAAA',
36: 'LAAA',
37: 'LAAA',
38: 'LAAA',
39: 'LAAA',
40: 'LAAA',
41: 'LAAA',
42: 'LAAA',
43: 'LAAA',
44: 'LAAA',
45: 'LAAA',
46: 'LAAA',
47: 'LAAA',
48: 'LAAA',
49: 'LAAA',
50: 'LAAA',
51: 'LAAA',
52: 'LAAA',
53: 'LAAA',
54: 'LAAA',
55: 'LAAA',
56: 'LAAA',
57: 'LAAA',
58: 'LAAA',
59: 'LAAA',
60: 'LAAA',
61: 'LAAA',
62: 'LAAA',
63: 'LAAA',
64: 'LAAA',
65: 'LAAA',
66: 'LAAA',
67: 'LAAA',
68: 'LAAA',
69: 'LAAA',
70: 'LAAA',
71: 'LAAA',
72: 'LAAA',
73: 'LAAA',
74: 'LAAA',
75: 'LAAA',
76: 'LAAA',
77: 'LAAA',
78: 'LAAA',
79: 'LAAA',
80: 'LAAA',
81: 'LAAA',
82: 'LAAA',
83: 'LAAA',
84: 'LAAA',
85: 'LAAA',
86: 'LAAA',
87: 'LAAA',
88: 'LAAA',
89: 'LAAA',
90: 'LAAA',
91: 'LAAA',
92: 'LAAA',
93: 'LAAA',
94: 'LAAA',
95: 'LAAA',
96: 'LAAA',
97: 'LAAA',
98: 'LAAA',
99: 'LAAA'},
'FIELD X': {0: 4.7949800000000007,
1: -5.5198839999999993,
2: 4.7949800000000007,
3: 4.7949800000000007,
4: -5.5198839999999993,
5: 4.7949800000000007,
6: -5.5198839999999993,
7: 4.7949800000000007,
8: 4.7949800000000007,
9: -5.5198839999999993,
10: -5.5198839999999993,
11: 4.7949800000000007,
12: 4.7949800000000007,
13: -5.5198839999999993,
14: 4.7949800000000007,
15: -5.5198839999999993,
16: 4.7949800000000007,
17: -5.5198839999999993,
18: 4.7949800000000007,
19: 4.7949800000000007,
20: -5.5198839999999993,
21: 4.7949800000000007,
22: -5.5198839999999993,
23: 4.7949800000000007,
24: 4.7949800000000007,
25: -5.5198839999999993,
26: 4.7949800000000007,
27: -5.5198839999999993,
28: -5.5198839999999993,
29: 4.7949800000000007,
30: -5.5198839999999993,
31: 4.7949800000000007,
32: 4.7949800000000007,
33: -5.5198839999999993,
34: 4.7949800000000007,
35: -5.5198839999999993,
36: 4.7949800000000007,
37: -5.5198839999999993,
38: 4.7949800000000007,
39: -5.5198839999999993,
40: 4.7949800000000007,
41: -5.5198839999999993,
42: 4.7949800000000007,
43: -5.5198839999999993,
44: 4.7949800000000007,
45: -5.5198839999999993,
46: 4.7949800000000007,
47: -5.5198839999999993,
48: 4.7949800000000007,
49: -5.5198839999999993,
50: -5.5198839999999993,
51: 4.7949800000000007,
52: -5.5198839999999993,
53: 4.7949800000000007,
54: 4.7949800000000007,
55: -5.5198839999999993,
56: 4.7949800000000007,
57: -5.5198839999999993,
58: 4.7949800000000007,
59: -5.5198839999999993,
60: 4.7949800000000007,
61: 4.7949800000000007,
62: -5.5198839999999993,
63: 4.7949800000000007,
64: -5.5198839999999993,
65: 4.7949800000000007,
66: 4.7949800000000007,
67: -5.5198839999999993,
68: 4.7949800000000007,
69: -5.5198839999999993,
70: -5.5198839999999993,
71: 4.7949800000000007,
72: -5.5198839999999993,
73: 4.7949800000000007,
74: -5.5198839999999993,
75: 4.7949800000000007,
76: -5.5198839999999993,
77: -5.5198839999999993,
78: 4.7949800000000007,
79: -5.5198839999999993,
80: 4.7949800000000007,
81: -5.5198839999999993,
82: 4.7949800000000007,
83: 4.7949800000000007,
84: -5.5198839999999993,
85: 4.7949800000000007,
86: -5.5198839999999993,
87: 4.7949800000000007,
88: 4.7949800000000007,
89: -5.5198839999999993,
90: -5.5198839999999993,
91: 4.7949800000000007,
92: 4.7949800000000007,
93: -5.5198839999999993,
94: 4.7949800000000007,
95: -5.5198839999999993,
96: 4.7949800000000007,
97: -5.5198839999999993,
98: 4.7949800000000007,
99: 4.7949800000000007},
'FIELD Y': {0: 1.8893500000000001,
1: 1.8893500000000001,
2: 1.8893500000000001,
3: 1.8893500000000001,
4: 1.8893500000000001,
5: 1.8893500000000001,
6: 1.8893500000000001,
7: 1.8893500000000001,
8: 1.8893500000000001,
9: 1.8893500000000001,
10: 1.8893500000000001,
11: 1.8893500000000001,
12: 1.8893500000000001,
13: 1.8893500000000001,
14: 1.8893500000000001,
15: 1.8893500000000001,
16: 1.8893500000000001,
17: 1.8893500000000001,
18: 1.8893500000000001,
19: 1.8893500000000001,
20: 1.8893500000000001,
21: 1.8893500000000001,
22: 1.8893500000000001,
23: 1.8893500000000001,
24: 1.8893500000000001,
25: 1.8893500000000001,
26: 1.8893500000000001,
27: 1.8893500000000001,
28: 1.8893500000000001,
29: 1.8893500000000001,
30: 1.8893500000000001,
31: 1.8893500000000001,
32: 1.8893500000000001,
33: 1.8893500000000001,
34: 1.8893500000000001,
35: 1.8893500000000001,
36: 1.8893500000000001,
37: 1.8893500000000001,
38: 1.8893500000000001,
39: 1.8893500000000001,
40: 1.8893500000000001,
41: 1.8893500000000001,
42: 1.8893500000000001,
43: 1.8893500000000001,
44: 1.8893500000000001,
45: 1.8893500000000001,
46: 1.8893500000000001,
47: 1.8893500000000001,
48: 1.8893500000000001,
49: 1.8893500000000001,
50: 1.8893500000000001,
51: 1.8893500000000001,
52: 1.8893500000000001,
53: 1.8893500000000001,
54: 1.8893500000000001,
55: 1.8893500000000001,
56: 1.8893500000000001,
57: 1.8893500000000001,
58: 1.8893500000000001,
59: 1.8893500000000001,
60: 1.8893500000000001,
61: 1.8893500000000001,
62: 1.8893500000000001,
63: 1.8893500000000001,
64: 1.8893500000000001,
65: 1.8893500000000001,
66: 1.8893500000000001,
67: 1.8893500000000001,
68: 1.8893500000000001,
69: 1.8893500000000001,
70: 1.8893500000000001,
71: 1.8893500000000001,
72: 1.8893500000000001,
73: 1.8893500000000001,
74: 1.8893500000000001,
75: 1.8893500000000001,
76: 1.8893500000000001,
77: 1.8893500000000001,
78: 1.8893500000000001,
79: 1.8893500000000001,
80: 1.8893500000000001,
81: 1.8893500000000001,
82: 1.8893500000000001,
83: 1.8893500000000001,
84: 1.8893500000000001,
85: 1.8893500000000001,
86: 1.8893500000000001,
87: 1.8893500000000001,
88: 1.8893500000000001,
89: 1.8893500000000001,
90: 1.8893500000000001,
91: 1.8893500000000001,
92: 1.8893500000000001,
93: 1.8893500000000001,
94: 1.8893500000000001,
95: 1.8893500000000001,
96: 1.8893500000000001,
97: 1.8893500000000001,
98: 1.8893500000000001,
99: 1.8893500000000001},
'Plate': {0: 72,
1: 72,
2: 72,
3: 72,
4: 72,
5: 72,
6: 72,
7: 72,
8: 72,
9: 72,
10: 72,
11: 72,
12: 72,
13: 72,
14: 72,
15: 72,
16: 72,
17: 72,
18: 72,
19: 72,
20: 72,
21: 72,
22: 72,
23: 72,
24: 72,
25: 72,
26: 72,
27: 72,
28: 72,
29: 72,
30: 72,
31: 72,
32: 72,
33: 72,
34: 72,
35: 72,
36: 72,
37: 72,
38: 72,
39: 72,
40: 72,
41: 72,
42: 72,
43: 72,
44: 72,
45: 72,
46: 72,
47: 72,
48: 72,
49: 72,
50: 72,
51: 72,
52: 72,
53: 72,
54: 72,
55: 72,
56: 72,
57: 72,
58: 72,
59: 72,
60: 72,
61: 72,
62: 72,
63: 72,
64: 72,
65: 72,
66: 72,
67: 72,
68: 72,
69: 72,
70: 72,
71: 72,
72: 72,
73: 72,
74: 72,
75: 72,
76: 72,
77: 72,
78: 72,
79: 72,
80: 131,
81: 131,
82: 131,
83: 131,
84: 131,
85: 131,
86: 131,
87: 131,
88: 131,
89: 131,
90: 131,
91: 131,
92: 131,
93: 131,
94: 131,
95: 131,
96: 131,
97: 131,
98: 131,
99: 131},
'Plate X': {0: -134.13406000000001,
1: -134.13406000000001,
2: -134.13406000000001,
3: -113.50433200000001,
4: -113.50433200000001,
5: -113.50433200000001,
6: -113.50433200000001,
7: -113.50433200000001,
8: -92.874604000000005,
9: -92.874604000000005,
10: -92.874604000000005,
11: -92.874604000000005,
12: -72.244876000000005,
13: -72.244876000000005,
14: -72.244876000000005,
15: -72.244876000000005,
16: -72.244876000000005,
17: -72.244876000000005,
18: -72.244876000000005,
19: -51.615147999999998,
20: -51.615147999999998,
21: -51.615147999999998,
22: -51.615147999999998,
23: -51.615147999999998,
24: -30.985420000000001,
25: -30.985420000000001,
26: -30.985420000000001,
27: -30.985420000000001,
28: -30.985420000000001,
29: -30.985420000000001,
30: -30.985420000000001,
31: -30.985420000000001,
32: -10.355691999999999,
33: -10.355691999999999,
34: -10.355691999999999,
35: -10.355691999999999,
36: -10.355691999999999,
37: -10.355691999999999,
38: -10.355691999999999,
39: 10.274036000000001,
40: 10.274036000000001,
41: 10.274036000000001,
42: 10.274036000000001,
43: 10.274036000000001,
44: 10.274036000000001,
45: 10.274036000000001,
46: 30.903764000000002,
47: 30.903764000000002,
48: 30.903764000000002,
49: 30.903764000000002,
50: 30.903764000000002,
51: 30.903764000000002,
52: 30.903764000000002,
53: 30.903764000000002,
54: 51.533491999999995,
55: 51.533491999999995,
56: 51.533491999999995,
57: 51.533491999999995,
58: 51.533491999999995,
59: 51.533491999999995,
60: 51.533491999999995,
61: 72.163219999999995,
62: 72.163219999999995,
63: 72.163219999999995,
64: 72.163219999999995,
65: 72.163219999999995,
66: 72.163219999999995,
67: 92.792947999999996,
68: 92.792947999999996,
69: 92.792947999999996,
70: 92.792947999999996,
71: 92.792947999999996,
72: 113.422676,
73: 113.422676,
74: 113.422676,
75: 113.422676,
76: 113.422676,
77: 134.052404,
78: 134.052404,
79: 134.052404,
80: -134.13406000000001,
81: -134.13406000000001,
82: -134.13406000000001,
83: -113.50433200000001,
84: -113.50433200000001,
85: -113.50433200000001,
86: -113.50433200000001,
87: -113.50433200000001,
88: -92.874604000000005,
89: -92.874604000000005,
90: -92.874604000000005,
91: -92.874604000000005,
92: -72.244876000000005,
93: -72.244876000000005,
94: -72.244876000000005,
95: -72.244876000000005,
96: -72.244876000000005,
97: -72.244876000000005,
98: -72.244876000000005,
99: -51.615147999999998},
'Plate Y': {0: -27.0123,
1: 0.039899999999999998,
2: 27.092099999999999,
3: -81.116699999999994,
4: -54.064500000000002,
5: 0.039899999999999998,
6: 54.144300000000001,
7: 81.1965,
8: -54.064500000000002,
9: -27.0123,
10: 27.092099999999999,
11: 54.144300000000001,
12: -108.16889999999999,
13: -81.116699999999994,
14: -27.0123,
15: 0.039899999999999998,
16: 27.092099999999999,
17: 81.1965,
18: 108.2487,
19: -81.116699999999994,
20: -54.064500000000002,
21: 0.039899999999999998,
22: 54.144300000000001,
23: 81.1965,
24: -135.22110000000001,
25: -108.16889999999999,
26: -54.064500000000002,
27: -27.0123,
28: 27.092099999999999,
29: 54.144300000000001,
30: 108.2487,
31: 135.30090000000001,
32: -108.16889999999999,
33: -81.116699999999994,
34: -27.0123,
35: 0.039899999999999998,
36: 27.092099999999999,
37: 81.1965,
38: 108.2487,
39: -135.22110000000001,
40: -81.116699999999994,
41: -54.064500000000002,
42: 0.039899999999999998,
43: 54.144300000000001,
44: 81.1965,
45: 135.30090000000001,
46: -135.22110000000001,
47: -108.16889999999999,
48: -54.064500000000002,
49: -27.0123,
50: 27.092099999999999,
51: 54.144300000000001,
52: 108.2487,
53: 135.30090000000001,
54: -108.16889999999999,
55: -81.116699999999994,
56: -27.0123,
57: 0.039899999999999998,
58: 27.092099999999999,
59: 81.1965,
60: 108.2487,
61: -81.116699999999994,
62: -54.064500000000002,
63: 0.039899999999999998,
64: 54.144300000000001,
65: 81.1965,
66: 108.2487,
67: -108.16889999999999,
68: -54.064500000000002,
69: -27.0123,
70: 27.092099999999999,
71: 54.144300000000001,
72: -81.116699999999994,
73: -27.0123,
74: 0.039899999999999998,
75: 27.092099999999999,
76: 81.1965,
77: -54.064500000000002,
78: 0.039899999999999998,
79: 54.144300000000001,
80: -27.0123,
81: 0.039899999999999998,
82: 27.092099999999999,
83: -81.116699999999994,
84: -54.064500000000002,
85: 0.039899999999999998,
86: 54.144300000000001,
87: 81.1965,
88: -54.064500000000002,
89: -27.0123,
90: 27.092099999999999,
91: 54.144300000000001,
92: -108.16889999999999,
93: -81.116699999999994,
94: -27.0123,
95: 0.039899999999999998,
96: 27.092099999999999,
97: 81.1965,
98: 108.2487,
99: -81.116699999999994},
'Unnamed: 0': {0: 0,
1: 1,
2: 2,
3: 3,
4: 4,
5: 5,
6: 6,
7: 7,
8: 8,
9: 9,
10: 10,
11: 11,
12: 12,
13: 13,
14: 14,
15: 15,
16: 16,
17: 17,
18: 18,
19: 19,
20: 20,
21: 21,
22: 22,
23: 23,
24: 24,
25: 25,
26: 26,
27: 27,
28: 28,
29: 29,
30: 30,
31: 31,
32: 32,
33: 33,
34: 34,
35: 35,
36: 36,
37: 37,
38: 38,
39: 39,
40: 40,
41: 41,
42: 42,
43: 43,
44: 44,
45: 45,
46: 46,
47: 47,
48: 48,
49: 49,
50: 50,
51: 51,
52: 52,
53: 53,
54: 54,
55: 55,
56: 56,
57: 57,
58: 58,
59: 59,
60: 60,
61: 61,
62: 62,
63: 63,
64: 64,
65: 65,
66: 66,
67: 67,
68: 68,
69: 69,
70: 70,
71: 71,
72: 72,
73: 73,
74: 74,
75: 75,
76: 76,
77: 77,
78: 78,
79: 79,
80: 80,
81: 81,
82: 82,
83: 83,
84: 84,
85: 85,
86: 86,
87: 87,
88: 88,
89: 89,
90: 90,
91: 91,
92: 92,
93: 93,
94: 94,
95: 95,
96: 96,
97: 97,
98: 98,
99: 99}}

From your question it doesnt sound like you want a multivariate regression (i.e. multiple Y's). If you're just predicting a single Y from multiple X's, you can do it like this with pandas, then save the results to a txt file:
import pandas as pd
df = pd.DataFrame(DF)
res = pd.stats.api.ols(y=df['C-A'], x=df[['Plate X','Plate Y','FIELD X']])
file = open("C:/Users/Simon/Desktop/results.txt", "w")
file.write(str(res))
file.close()
You mentioned in the question that you want to group the analyses by Drum and Plate. However, every value is the same for the Drum rows. If you want to group by Plate, however, and then run OLS on each subgroup, you can do something like this:
import pandas as pd
df = pd.DataFrame(DF)
results = []
def ols_res(df):
results.append( pd.stats.api.ols(y=df['C-A'], x=df[['Plate X','Plate Y','FIELD X']]))
df.groupby('Plate').apply(lambda newdf: ols_res(newdf))
file = open("C:/Users/Simon/Desktop/results.txt", "w")
for el in results:
file.write(str(el))
file.close()
If you want to also group by Drum, and note which drum/plate combination each analysis is for, you can do something like this and just add some extra txt to the results file:
import pandas as pd
df = pd.DataFrame(DF)
results = []
def ols_res(df):
curCombo = "plate:" + str(df["Plate"].mean()) + ", drum:" + str(df["Drum"].unique())
regression_results = pd.stats.api.ols(y=df['C-A'], x=df[['Plate X','Plate Y','FIELD X']])
results.append([curCombo, regression_results])
df.groupby(['Plate', 'Drum']).apply(lambda newdf: ols_res(newdf))
file = open("C:/Users/Simon/Desktop/results.txt", "w")
for el in results:
file.write(str(el))
file.write("\n\n")
file.close()

Related

Number of instances in a list variable pandas

in my database I have an id (docdb_family_id) and a list of ids (cited_docdb_list) as follows:
{'docdb_family_id': {0: 3498148,
1: 3512921,
2: 3525647,
3: 3636418,
4: 3673165,
5: 3680127,
6: 3688953,
7: 3689983,
8: 3700898,
9: 3768731,
10: 3770463,
11: 3771404,
12: 3771425,
13: 3771495,
14: 3771604,
15: 3772274,
16: 3772510,
17: 3772940,
18: 3775109,
19: 3779413,
20: 3783583,
21: 3784332,
22: 3784469,
23: 3787179,
24: 3787982,
25: 3790639,
26: 3790670,
27: 3792458,
28: 3795015,
29: 3799670,
30: 3800683,
31: 3802132,
32: 3802281,
33: 3803326,
34: 3803728,
35: 3808684,
36: 3809416,
37: 3810114,
38: 3811389,
39: 3812435,
40: 3813073,
41: 3813312,
42: 3815934,
43: 3816821,
44: 3816927,
45: 3817424,
46: 3818542,
47: 3818766,
48: 3819057,
49: 3819335,
50: 3820633,
51: 3820694,
52: 3821540,
53: 3821838,
54: 3822049,
55: 3822089,
56: 3823057,
57: 3823114,
58: 3824187,
59: 3824375,
60: 3825785,
61: 3826171,
62: 3826211,
63: 3827560,
64: 3828464,
65: 3829519,
66: 3829990,
67: 3831455,
68: 3831510,
69: 3831784,
70: 3831999,
71: 3832248,
72: 3832987,
73: 3834046,
74: 3834444,
75: 3835251,
76: 3886195,
77: 3887480,
78: 3890389,
79: 3892024,
80: 3944218},
'cited_docdb_list': {0: '[3454392.0, 3489764.0, 3492286.0, 3802281.0, 3944218.0, 4161113.0, 6055754.0, 4167218.0, 6245259.0, 6310327.0, 6339325.0, 7865817.0, 10818295.0, 21820994.0, 25257112.0, 25333370.0, 25421470.0]',
1: '[22785397.0, 3800683.0]',
2: '[3508710.0, 3832248.0, 6015961.0, 9173676.0, 22615010.0]',
3: '[3482303.0, 3518675.0, 3688207.0, 3688953.0, 7856041.0, 9893906.0, 9911676.0, 21740142.0, 22095959.0, 22224845.0, 22455261.0, 22522023.0, 23039462.0, 23149018.0, 23248627.0, 25608484.0, 26145960.0, 26246393.0, 27122358.0, 27215945.0, 27267946.0, 27368911.0, 27535943.0, 27569239.0, 27759996.0, 34107815.0, 35219296.0, 46248356.0]',
4: '[7917626.0, 13587294.0, 15860525.0, 16099836.0, 18349663.0, 18831836.0, 24223941.0, 26558149.0]',
5: '[3680147.0, 3680169.0, 6442447.0, 8168860.0, 8170479.0, 8178540.0, 8178541.0, 10655404.0, 10764890.0, 10765687.0, 11600956.0, 14593411.0, 22296890.0, 22471622.0, 24169239.0, 24966171.0, 25033444.0, 25166841.0, 25372199.0, 25459000.0, 25533862.0, 25918313.0, 26371384.0, 26439834.0, 27274967.0, 27294655.0, 27523014.0]',
6: '[5459370.0, 16645542.0, 17462457.0, 21959571.0, 22010115.0, 22296144.0, 26927437.0, 33041169.0, 33101777.0, 34066530.0]',
7: '[7650618.0, 7806400.0, 7835575.0, 7857812.0, 8210353.0, 8232323.0, 8239494.0, 10024300.0, 11566936.0, 11637978.0, 11942149.0, 12192469.0, 12437164.0, 12474858.0, 12862377.0, 13357403.0, 13391145.0, 13884195.0, 14268316.0, 14780600.0, 14837681.0, 14959673.0, 15493334.0, 15660109.0, 15690908.0, 15706187.0, 15740492.0, 16185014.0, 16286275.0, 16301821.0, 16400795.0, 16599264.0, 16867936.0, 17017842.0, 17303135.0, 18156945.0, 18168645.0, 18351330.0, 18357701.0, 18361853.0, 18553020.0, 18665747.0, 22042028.0, 22509938.0, 22752953.0, 22752985.0, 22955054.0, 23605846.0, 23635250.0, 24042617.0, 24281660.0, 24426092.0, 24470177.0, 25217414.0, 25342266.0, 25399276.0, 25481652.0, 26026958.0, 26034429.0, 26150729.0, 26427482.0, 26488815.0, 26500234.0, 26537700.0, 26644976.0, 26692209.0, 26785282.0, 27339916.0, 27370666.0, 27372394.0, 27524906.0, 27563165.0, 29229947.0, 49274340.0]',
8: '[3764296.0, 3770459.0, 3773222.0, 3811210.0, 3825785.0, 6119308.0, 6262275.0, 6409776.0, 6450504.0, 6484157.0, 7640046.0, 7646955.0, 7762359.0, 7813503.0, 7823236.0, 7886063.0, 8103745.0, 10347742.0, 10563528.0, 11894269.0, 12556976.0, 12589238.0, 12666170.0, 12673679.0, 12702964.0, 13630878.0, 14026520.0, 14271281.0, 14325872.0, 14416179.0, 15383496.0, 15479503.0, 15920227.0, 16127226.0, 16222285.0, 16339588.0, 16871054.0, 16912938.0, 16912954.0, 16913656.0, 17401011.0, 17461197.0, 17474177.0, 17663812.0, 17724327.0, 18063449.0, 18227455.0, 18250669.0, 18386252.0, 18426307.0, 18587018.0, 18654484.0, 19300409.0, 19312456.0, 19372912.0, 19550439.0, 19638358.0, 19704233.0, 21801532.0, 21877403.0, 21974791.0, 22002267.0, 22067617.0, 22089128.0, 22098429.0, 22223747.0, 22276463.0, 22298327.0, 22341037.0, 22385483.0, 22395684.0, 22676560.0, 22731313.0, 22904054.0, 22918676.0, 23080548.0, 23084056.0, 23402016.0, 23516757.0, 23601888.0, 23628604.0, 23848237.0, 24030077.0, 24083853.0, 24132340.0, 24248118.0, 24251602.0, 24295241.0, 24316904.0, 24422851.0, 24429865.0, 24443752.0, 24547890.0, 24589548.0, 24632640.0, 24770649.0, 24785182.0, 24839047.0, 24962082.0, 25028009.0, 25378809.0, 25397848.0, 25410040.0, 25434196.0, 25449992.0, 25470970.0, 25494098.0, 25514405.0, 25525923.0, 25540364.0, 26040210.0, 26438189.0, 26450647.0, 26486031.0, 26707770.0, 26723069.0, 26723453.0, 26748272.0, 26870598.0, 26889379.0, 26889380.0, 26901249.0, 26985941.0, 26990011.0, 27000869.0, 27018916.0, 27025822.0, 27060755.0, 27060756.0, 27311622.0, 27315336.0, 27340467.0, 27569697.0, 37944191.0, 46149961.0, 46255262.0]',
9: '[8583594.0, 9119276.0, 21793982.0, 22133036.0, 24149220.0, 25776190.0, 26736757.0]',
10: '[10568655.0, 13302684.0, 19844775.0, 22493955.0, 26714695.0, 26997884.0]',
11: '[4344006.0, 24838031.0, 25098959.0, 25395637.0, 27025593.0]',
12: '[25642630.0, 25642846.0, 25642930.0, 26279148.0, 26287348.0]',
13: '[10451245.0, 10564358.0, 22491246.0, 24064440.0, 24279325.0, 24519613.0, 24651262.0, 25072503.0, 26461666.0, 26692304.0]',
14: '[4351264.0, 4384434.0, 6117960.0, 9116940.0, 10999954.0, 22148709.0, 22562211.0, 23862977.0, 24037344.0, 24361917.0, 24432647.0, 25076138.0, 26840072.0, 27429215.0]',
15: '[3692248.0, 6053171.0, 6226485.0, 12362875.0, 27371744.0]',
16: '[5933264.0, 6125219.0, 6247996.0, 10521070.0, 13063586.0, 15774983.0, 16803481.0, 16904934.0, 22065174.0, 27127184.0, 27496706.0, 27624793.0]',
17: '[3526456.0, 6170998.0, 6335295.0, 10505184.0, 11549684.0, 14422646.0, 15088415.0, 17645959.0, 22169836.0, 22901756.0, 22994874.0, 22994878.0, 23172874.0, 23925148.0, 25244507.0, 27389063.0]',
18: '[6350760.0, 20369026.0, 24216636.0, 26762272.0, 26927655.0, 27126594.0, 27371255.0]',
19: '[3775878.0, 6008063.0, 12812693.0, 13575794.0, 14790639.0, 22013262.0, 24622370.0, 26901485.0, 26985941.0, 27076644.0, 27112632.0]',
20: '[3775488.0, 10948289.0, 10952971.0, 10952974.0, 11367322.0, 12710129.0, 15469131.0, 22577881.0, 25644554.0, 26467182.0, 26933783.0, 27401801.0]',
21: '[6134715.0, 6350620.0, 15983939.0, 16269143.0, 17680987.0, 23994234.0, 24992672.0, 26268730.0, 26367621.0, 26629308.0, 26787837.0, 26988835.0, 27365620.0, 27455735.0, 27476152.0, 41508342.0]',
22: '[3690998.0, 3779413.0, 8103745.0, 10528617.0, 10533016.0, 14026520.0, 17474177.0, 21959397.0, 22069056.0, 23038428.0, 23077293.0, 24078130.0, 24160889.0, 25618055.0, 26462451.0, 27407332.0, 27569697.0]',
23: '[6512805.0, 8105738.0, 10680104.0, 10719170.0, 18290174.0, 22237701.0, 22290947.0, 23695912.0, 23765282.0, 24565635.0, 26289399.0, 27358491.0, 27420192.0]',
24: '[6462400.0, 16101703.0, 24045826.0, 25612324.0, 26283893.0, 26434155.0]',
25: '[8208100.0, 23566456.0, 23702554.0, 25266985.0, 26142859.0]',
26: '[3771632.0, 14240231.0, 15623240.0, 22486268.0, 23605938.0, 27170740.0]',
27: '[3798105.0, 46299235.0, 46299236.0, 46299237.0, 46299238.0, 46299740.0, 46299800.0]',
28: '[2631556.0, 2944019.0, 10790311.0, 13793711.0, 18470587.0, 21851951.0, 21924559.0, 23889759.0, 23927439.0, 23963011.0, 24766696.0, 26713651.0, 26990589.0, 27287227.0]',
29: '[3796218.0, 24589826.0, 25624390.0, 25765848.0]',
30: '[3772972.0, 6025591.0, 7764892.0, 12805981.0, 15547363.0, 16262273.0, 21905352.0, 22082762.0, 23922610.0, 23984212.0, 24257317.0, 25315731.0, 25402356.0, 25518280.0, 26719186.0, 26734227.0, 26940453.0, 26979759.0, 27025821.0, 27025822.0]',
31: '[3779080.0, 3794389.0, 9425562.0, 10768435.0, 22860582.0, 25471727.0, 25617513.0, 25620315.0, 25644721.0, 26092132.0, 27153345.0]',
32: '[2634854.0, 3700806.0, 3802276.0, 3802292.0, 3802325.0, 3802326.0, 3802327.0, 3802332.0, 3802333.0, 3802334.0, 3802337.0, 3802338.0, 3802339.0, 3802354.0, 3802356.0, 3805158.0, 3805178.0, 3805242.0, 3806854.0, 3808228.0, 3808232.0, 3808236.0, 3810760.0, 4258298.0, 6062612.0, 6161522.0, 6180029.0, 6243195.0, 6328004.0, 6352957.0, 6397822.0, 6415485.0, 6456158.0, 6476429.0, 6495895.0, 7588639.0, 9099878.0, 9119945.0, 9447476.0, 9454581.0, 9460842.0, 10036436.0, 10089783.0, 10642403.0, 10676758.0, 10702950.0, 10729821.0, 10746269.0, 11194385.0, 11411510.0, 11592343.0, 12638122.0, 12808119.0, 13792188.0, 13869248.0, 13880272.0, 14224791.0, 14363363.0, 14475114.0, 14555145.0, 14654996.0, 14659718.0, 14905880.0, 15009474.0, 15208979.0, 15365386.0, 15418108.0, 15427440.0, 15532726.0, 15759142.0, 15839949.0, 16148732.0, 16454470.0, 16472116.0, 16557241.0, 16567151.0, 16574330.0, 16670501.0, 16826733.0, 16866056.0, 16917358.0, 16952937.0, 17009237.0, 17042089.0, 17152410.0, 17167043.0, 17167057.0, 17176980.0, 17177751.0, 17203313.0, 17214040.0, 17359106.0, 17384372.0, 17390431.0, 17398779.0, 17419690.0, 17521757.0, 17541035.0, 17548222.0, 17692283.0, 17709222.0, 17752106.0, 17787836.0, 17980830.0, 18032898.0, 18091978.0, 18108188.0, 18157469.0, 18183177.0, 18202974.0, 18210551.0, 18356218.0, 18513671.0, 20358277.0, 21694022.0, 21760302.0, 21839477.0, 22005188.0, 22196129.0, 22231670.0, 22241704.0, 22321076.0, 22407725.0, 22574957.0, 22624317.0, 22688378.0, 22819977.0, 22837041.0, 22856540.0, 22891528.0, 22899520.0, 22911089.0, 22957363.0, 22978599.0, 23009341.0, 23016791.0, 23017033.0, 23194812.0, 23238114.0, 23242315.0, 23372955.0, 23403394.0, 23583171.0, 23717292.0, 23818247.0, 23822065.0, 23967128.0, 24023429.0, 24035021.0, 24041033.0, 24056428.0, 24092174.0, 24102216.0, 24115524.0, 24258574.0, 24305268.0, 24384033.0, 24407235.0, 24437414.0, 24440441.0, 24511068.0, 24607773.0, 24618564.0, 24640870.0, 24695776.0, 24712750.0, 24771021.0, 24777130.0, 24782249.0, 24802597.0, 24824797.0, 24857748.0, 24902244.0, 24921608.0, 24928011.0, 24981047.0, 24992362.0, 25006081.0, 25056097.0, 25079341.0, 25079896.0, 25098400.0, 25128528.0, 25157096.0, 25175720.0, 25184562.0, 25211651.0, 25273616.0, 25325219.0, 25395409.0, 25430909.0, 25431399.0, 25441093.0, 25458263.0, 25459754.0, 25478333.0, 25511171.0, 25540179.0, 25644902.0, 25645209.0, 25645479.0, 25645484.0, 25645485.0, 25645493.0, 25645494.0, 25645495.0, 25645496.0, 25645497.0, 25645498.0, 25645507.0, 25645510.0, 25645511.0, 25645513.0, 25645515.0, 25645516.0, 25645517.0, 25645524.0, 25645526.0, 25645531.0, 25645539.0, 25645541.0, 25645542.0, 25645545.0, 25645546.0, 25645548.0, 26138811.0, 26227739.0, 26352404.0, 26435079.0, 26437848.0, 26443181.0, 26495400.0, 26535740.0, 26564673.0, 26687357.0, 26688326.0, 26719816.0, 26767248.0, 26792309.0, 26883761.0, 27005599.0, 27048622.0, 27054476.0, 27157854.0, 27158025.0, 27204375.0, 27278808.0, 27279445.0, 27288524.0, 27308865.0, 27324977.0, 27325474.0, 27329746.0, 27339109.0, 27376149.0, 27467592.0, 27522909.0, 27526374.0, 27530134.0, 27530208.0, 27542962.0, 27550891.0, 27551736.0, 27552627.0, 27554184.0, 27554356.0, 27557355.0, 27577752.0, 27578291.0, 28455314.0, 29248275.0, 29999199.0, 31994773.0, 32302805.0, 32324813.0, 34221894.0, 34753905.0, 36808782.0, 36954792.0, 38226628.0, 38622001.0, 38622009.0, 46253718.0, 46302623.0, 46302626.0, 46330220.0, 56289952.0]',
33: '[9193201.0, 9690456.0, 11262890.0, 11857463.0, 20399558.0, 22182248.0, 23000715.0, 23242310.0, 23324343.0, 23849738.0, 24920698.0, 26305246.0]',
34: '[2103060.0, 3773965.0, 3774544.0, 3775695.0, 3775872.0, 3776256.0, 3786612.0, 3791581.0, 5870313.0, 5916275.0, 6021141.0, 6199234.0, 6245542.0, 6295893.0, 6295894.0, 6295895.0, 6296520.0, 6365302.0, 6421653.0, 6453213.0, 6470668.0, 6470669.0, 6505848.0, 7762300.0, 7996364.0, 8204435.0, 8504791.0, 8516769.0, 8537466.0, 9978587.0, 10525500.0, 10532630.0, 11421697.0, 11861168.0, 11938229.0, 12631519.0, 14831183.0, 15028144.0, 19729781.0, 19865575.0, 20357413.0, 21762166.0, 21916786.0, 22585241.0, 22736795.0, 22800842.0, 22821355.0, 23120569.0, 23397799.0, 23436004.0, 23481575.0, 23518025.0, 23722477.0, 23740173.0, 23790685.0, 23790691.0, 23790693.0, 23844609.0, 23967824.0, 24169834.0, 24225931.0, 24575089.0, 24686268.0, 24701256.0, 24701581.0, 24738797.0, 24962380.0, 25062108.0, 25145546.0, 25220031.0, 25326521.0, 25341958.0, 25350944.0, 25375270.0, 25532312.0, 25636025.0, 25671453.0, 25782505.0, 25782589.0, 26158327.0, 26516437.0, 26877119.0, 26950677.0, 27100111.0, 27157416.0, 27167473.0, 27286248.0, 27339086.0, 27339905.0, 27356707.0, 27404057.0, 27414896.0, 27461178.0, 27462950.0, 27464289.0, 27477792.0, 27490121.0, 41667474.0]',
35: '[3775287.0, 24656178.0, 25590998.0, 26752872.0, 27104052.0, 27111638.0, 27154855.0, 27449240.0, 27505577.0]',
36: '[10966704.0, 14429073.0, 14796404.0, 24388079.0, 25634499.0, 55024694.0]',
37: '[3810974.0, 6485046.0, 8220639.0, 10710317.0, 24372965.0, 25336013.0, 26139248.0, 30115768.0, 31188433.0, 34102684.0, 35502814.0, 41505355.0, 44170427.0, 46325309.0]',
38: '[3087303.0, 4124422.0, 20979317.0, 21870465.0, 23941444.0, 25013107.0, 25326934.0, 25638943.0, 26674623.0, 27041345.0, 27357929.0, 27505577.0]',
39: '[3796218.0, 3799670.0, 13202074.0, 16015369.0, 18376479.0, 21761811.0, 22420460.0, 25064869.0, 25362187.0, 25420991.0, 25645622.0]',
40: '[6383399.0, 11571184.0, 16203469.0, 19328209.0, 19338037.0, 23609959.0, 23669719.0, 24172105.0, 24533474.0, 25545404.0, 27031913.0, 27475424.0]',
41: '[3790030.0, 10844179.0, 17904788.0, 25518619.0, 25644273.0, 26230725.0, 27107515.0, 27358315.0]',
42: '[3765777.0, 5219438.0, 6509530.0, 9401909.0, 10606015.0, 11550806.0, 12762794.0, 13827315.0, 14042779.0, 15264928.0, 15458075.0, 15925094.0, 16128449.0, 17054858.0, 18055051.0, 18471454.0, 21862046.0, 22293413.0, 22679682.0, 24127226.0, 24176606.0, 24248291.0, 24679083.0, 25083983.0, 25400937.0, 26366826.0, 26985312.0]',
43: '[3775287.0, 3776915.0, 21721135.0, 22104735.0, 22570362.0, 25326934.0, 25584184.0, 25586333.0, 25638943.0, 26759870.0]',
44: '[4173143.0, 7807763.0, 13522010.0, 13654473.0, 13927771.0, 15719616.0, 16249907.0, 16525019.0, 21694632.0, 22093627.0, 22464844.0, 22964817.0, 23061734.0, 23211210.0, 23691361.0, 23831988.0, 23938149.0, 24244391.0, 24684633.0, 25241119.0, 25530551.0, 26801599.0, 27370214.0, 27539801.0, 27556890.0, 46249740.0]',
45: '[7830781.0, 7843024.0, 7852695.0, 8237386.0, 9444575.0, 10762585.0, 21739343.0, 21899596.0, 22200593.0, 23421862.0, 24138149.0, 25127817.0, 26792398.0, 33378328.0]',
46: '[24175325.0, 26752769.0, 26865384.0]',
47: '[3808127.0, 22989911.0, 22991587.0, 24661354.0, 25009434.0]',
48: '[3801540.0, 5986989.0, 7758470.0, 13433718.0, 13869888.0, 13870030.0, 13870091.0, 15253727.0, 15460683.0, 15581976.0, 15640684.0, 16014121.0, 17269442.0, 17330959.0, 18272758.0, 18289278.0, 19819299.0, 22635021.0, 22763032.0, 24234146.0, 25270151.0, 25330011.0, 26481016.0, 26873860.0, 30798811.0]',
49: '[4161793.0, 21787085.0, 22034688.0, 23282114.0, 24428824.0, 25016295.0]',
50: '[3793081.0, 3803264.0, 4207952.0, 11470889.0, 11669056.0, 12523378.0, 12636851.0, 12730154.0, 15584724.0, 16344287.0, 17109625.0, 17721742.0, 17745772.0, 17910462.0, 18186065.0, 18210837.0, 18223914.0, 21639272.0, 22927223.0, 26708844.0, 27047225.0, 27290433.0, 27308607.0, 27314463.0, 27584488.0, 60520854.0]',
51: '[26150927.0, 27292634.0]',
52: '[2092705.0, 2855690.0, 3448135.0, 3808851.0, 4531792.0, 7778731.0, 12783185.0, 17298876.0, 20135092.0, 20175428.0, 20913824.0, 21599292.0, 22046526.0, 22607332.0, 22691016.0, 22787233.0, 22930717.0, 23249413.0, 23308386.0, 23380573.0, 23824923.0, 23929977.0, 23970974.0, 24197297.0, 24485989.0, 25130652.0, 26732210.0, 26735928.0, 26743678.0, 26786285.0, 27584461.0, 29547928.0, 31990350.0, 78669067.0]',
53: '[22113349.0, 26695070.0, 27119373.0, 27493256.0]',
54: '[3777847.0, 3790007.0, 21871161.0, 22030506.0, 22031745.0, 22176213.0, 22401126.0, 23088391.0, 25613851.0, 25646253.0, 26671540.0, 26863907.0, 26903057.0, 27397174.0, 39338541.0]',
55: '[3802927.0, 3823288.0, 4984890.0, 4989432.0, 5073611.0, 5082137.0, 6061217.0, 6348178.0, 6423623.0, 10965588.0, 15797375.0, 18127308.0, 18175653.0, 18289498.0, 18849747.0, 21800742.0, 22397195.0, 23221251.0, 23468869.0, 23690813.0, 24191813.0, 24284509.0, 24708045.0, 24855719.0, 25014176.0, 25360346.0, 26846684.0, 27033183.0, 27275736.0, 27331606.0, 27490188.0, 27535521.0, 27568184.0, 27574439.0, 27578281.0, 27578284.0, 27650233.0, 34549244.0, 34746656.0, 35542271.0, 35736297.0, 36587440.0, 37433822.0, 37967362.0, 38022911.0, 38066849.0, 39925109.0, 46251516.0, 46252778.0, 46252929.0]',
56: '[1343281.0, 1345715.0, 3512210.0, 3783167.0, 4382571.0, 5813114.0, 7093752.0, 8235578.0, 8518638.0, 8783563.0, 8850107.0, 9121566.0, 9923753.0, 9955607.0, 10692798.0, 12383956.0, 12776229.0, 12886199.0, 12969910.0, 14707530.0, 14889080.0, 15072156.0, 19041276.0, 20298361.0, 21688702.0, 21900949.0, 21937269.0, 22104118.0, 22153767.0, 22186346.0, 22826706.0, 22855741.0, 22953235.0, 23004360.0, 23134063.0, 23354534.0, 23591524.0, 24305737.0, 24462242.0, 24489942.0, 24592901.0, 24641378.0, 25198004.0, 25253475.0, 25275454.0, 25432521.0, 25488956.0, 25643518.0, 26068855.0, 26166520.0, 26320235.0, 26328728.0, 26331139.0, 26428311.0, 26693295.0, 26791936.0, 26793455.0, 26961378.0, 26972264.0, 27059428.0, 27157985.0, 27313342.0, 27379089.0, 27395407.0, 27399829.0, 27424041.0, 27424409.0, 27517571.0, 27547373.0, 27584206.0, 28676052.0, 29709654.0, 29765036.0, 30774464.0, 32030450.0, 33159613.0, 33476757.0, 34135377.0, 34193337.0, 34958524.0, 36144355.0, 36567630.0, 36950563.0, 36971922.0, 37494273.0, 37855421.0, 37911312.0, 37989420.0, 38051788.0, 38218330.0, 38345747.0, 38420621.0, 38624732.0, 38823526.0, 38876900.0, 38962587.0, 39101659.0, 39226884.0, 39271180.0, 39387557.0, 39439714.0, 39561752.0, 39643971.0, 39673143.0, 39688790.0, 39748498.0, 39758481.0, 39789493.0, 39832372.0, 40003041.0, 40227969.0, 40380014.0, 40511531.0, 40565551.0, 40567797.0, 40624345.0, 40667466.0, 40824391.0, 40944227.0, 41129307.0, 41210096.0, 41277879.0, 41398494.0, 42073897.0, 42310155.0, 42546349.0, 42727821.0, 42826416.0, 42993293.0, 43014521.0, 43062470.0, 43220481.0, 43223027.0, 43301173.0, 43357321.0, 43478228.0, 43823348.0, 43876770.0, 44319684.0, 44369791.0, 44486085.0, 44531864.0, 45035300.0, 45066335.0, 45493803.0, 45495953.0, 45559863.0, 45925310.0, 45927155.0, 46300493.0, 46328187.0, 46798573.0, 46928022.0, 47018208.0, 47219813.0, 47296708.0, 47882498.0, 48535050.0, 48613244.0, 48692634.0, 49624233.0, 50184623.0, 50773492.0, 50775319.0, 51263061.0, 51581192.0, 51581222.0, 51842981.0, 52278746.0, 52280706.0, 52466999.0, 52544493.0, 52779208.0, 53403873.0, 54287989.0, 54782889.0, 54929359.0, 55019821.0, 55646830.0, 57249384.0, 57249913.0, 57325991.0, 59743243.0]',
57: '[3796196.0, 21858396.0, 25495565.0]',
58: '[3813145.0, 4154951.0, 6018005.0, 6040632.0, 6179742.0, 6395409.0, 6481277.0, 9158815.0, 9288505.0, 10699030.0, 13165538.0, 13755942.0, 14985984.0, 15515377.0, 15951653.0, 21965800.0, 22532548.0, 23301780.0, 23973288.0, 24550262.0, 24731087.0, 24876009.0, 25480283.0, 25489069.0, 26724897.0, 27296379.0, 27358904.0, 27410676.0, 46252098.0]',
59: '[3781927.0, 3789640.0, 3813305.0, 10731687.0, 11027021.0, 20414469.0, 23714925.0, 32595626.0, 33029875.0]',
60: '[3700898.0, 3764296.0, 3770459.0, 3773222.0, 3811210.0, 5987130.0, 6119308.0, 6262275.0, 6409776.0, 6450504.0, 6484157.0, 7640046.0, 7646955.0, 7762359.0, 7812486.0, 7813503.0, 7823236.0, 7886063.0, 8103745.0, 10347742.0, 10563528.0, 11004384.0, 11509383.0, 12543065.0, 12556976.0, 12589238.0, 12653339.0, 12666170.0, 12673679.0, 12702964.0, 14026520.0, 14266412.0, 14271281.0, 14325872.0, 14416179.0, 14516479.0, 14785130.0, 15044247.0, 15383496.0, 16127226.0, 16222285.0, 16960430.0, 17266862.0, 17401011.0, 17461197.0, 17474177.0, 17724327.0, 18063449.0, 18250669.0, 18265166.0, 18426307.0, 19300409.0, 19312456.0, 19372912.0, 19550439.0, 19638358.0, 19704233.0, 21801532.0, 21877403.0, 21974791.0, 22002267.0, 22026693.0, 22067617.0, 22089128.0, 22098429.0, 22164670.0, 22223747.0, 22244680.0, 22276463.0, 22298327.0, 22341037.0, 22385483.0, 22395684.0, 22439618.0, 22676560.0, 22718956.0, 22731313.0, 22904054.0, 22918676.0, 23080548.0, 23084056.0, 23218996.0, 23402016.0, 23423296.0, 23516757.0, 23601888.0, 23628604.0, 23848237.0, 23994110.0, 24030077.0, 24083853.0, 24132340.0, 24248118.0, 24295241.0, 24316904.0, 24422851.0, 24429865.0, 24443752.0, 24547890.0, 24589548.0, 24632640.0, 24741062.0, 24770649.0, 24785182.0, 24828348.0, 24839047.0, 24962082.0, 25028009.0, 25031599.0, 25341468.0, 25342918.0, 25378809.0, 25397848.0, 25410040.0, 25434196.0, 25449992.0, 25470970.0, 25494098.0, 25501373.0, 25514405.0, 25525923.0, 25540364.0, 26040210.0, 26228525.0, 26438189.0, 26450647.0, 26451566.0, 26470665.0, 26486031.0, 26707770.0, 26723069.0, 26723453.0, 26735162.0, 26748272.0, 26754314.0, 26870598.0, 26889379.0, 26889380.0, 26901249.0, 26985941.0, 26989589.0, 27000869.0, 27018916.0, 27025822.0, 27060755.0, 27060756.0, 27218208.0, 27293276.0, 27311622.0, 27316775.0, 27340467.0, 27569697.0, 31501140.0, 34800104.0, 37944191.0, 46149961.0, 46255262.0]',
61: '[3815245.0, 3817049.0, 4133414.0, 4237390.0, 6139410.0, 6302055.0, 6327475.0, 6359463.0, 7761745.0, 10634188.0, 10656776.0, 10799990.0, 11834232.0, 16311228.0, 16686050.0, 17340430.0, 21736076.0, 21792800.0, 22060322.0, 22083057.0, 22105805.0, 22177967.0, 22267098.0, 22415413.0, 22587189.0, 22605414.0, 22605428.0, 22626741.0, 22915051.0, 22915132.0, 22915137.0, 22916043.0, 23096413.0, 23212725.0, 23567105.0, 23567123.0, 23591762.0, 23793319.0, 23812585.0, 24102064.0, 24464348.0, 24622307.0, 25253365.0, 25352342.0, 25353269.0, 25427184.0, 25545290.0, 25671035.0, 26295357.0, 26368255.0, 26595469.0, 26726319.0, 26743135.0, 26822697.0, 26997208.0, 26997210.0, 27015502.0, 27015504.0, 27035582.0, 27038209.0, 27056966.0, 27062452.0, 27081705.0, 27383119.0, 27494547.0, 27547324.0]',
62: '[8193903.0, 8212273.0, 9247849.0, 9463029.0, 10512343.0, 11040434.0, 19848880.0, 21871975.0, 22614354.0, 25182231.0, 25355514.0, 27116547.0]',
63: '[3814657.0, 3816821.0, 3818830.0, 9372780.0, 22791620.0, 22805152.0, 23283422.0, 25248920.0, 25586333.0, 27020756.0, 27125092.0, 27145399.0, 27435241.0, 27449240.0, 27582841.0]',
64: '[3775561.0, 3778209.0, 3780242.0, 3783251.0, 3784665.0, 3788774.0, 3798212.0, 3811858.0, 3812283.0, 21830878.0, 21921748.0, 21993829.0, 22457245.0, 22460889.0, 23262728.0, 23400964.0, 23566456.0, 24092138.0, 24403780.0, 25289929.0, 25369658.0, 25618677.0, 25619320.0, 25629177.0, 25634619.0, 25645458.0, 26901477.0, 27038338.0, 27156461.0, 27158001.0, 27372667.0, 27391046.0, 27503418.0, 27537075.0]',
65: '[3769083.0, 3838826.0, 6518919.0, 7655380.0, 7671393.0, 9161974.0, 11933062.0, 12421582.0, 14111284.0, 15041555.0, 17038380.0, 17934524.0, 17951479.0, 17951704.0, 18736765.0, 21855631.0, 22254687.0, 22522730.0, 22525819.0, 22654614.0, 23072375.0, 23161341.0, 23682934.0, 23928270.0, 24002481.0, 25012845.0, 25464571.0, 25530090.0, 25936857.0, 26407346.0, 26861077.0, 41210539.0]',
66: '[3771617.0, 3807056.0, 8167498.0, 9489516.0, 13059819.0, 15236705.0, 17288890.0, 18106562.0, 18243976.0, 19449212.0, 19549705.0, 20360746.0, 21950670.0, 22523056.0, 22590937.0, 22822082.0, 22985088.0, 23085669.0, 23264894.0, 23454885.0, 23791789.0, 24158232.0, 24239892.0, 24257894.0, 24280874.0, 24434788.0, 24953310.0, 24990933.0, 25037706.0, 26312302.0, 26461656.0, 26569604.0, 26755930.0, 26802300.0, 26860472.0, 26891244.0, 26998345.0, 27036330.0, 27157297.0, 27377463.0]',
67: '[8223754.0, 21700957.0, 22248239.0, 24188773.0, 25199790.0, 25489601.0, 27370550.0]',
68: '[3824061.0, 10778962.0, 27157905.0]',
69: '[3885448.0, 4265687.0, 6453737.0, 15055174.0, 21588115.0, 22803210.0, 22810531.0, 22830406.0, 23778134.0, 23779509.0, 26598222.0, 27395145.0, 27536489.0]',
70: '[3817251.0, 3824297.0, 11604215.0, 13348182.0, 15295862.0, 17007082.0, 19729972.0, 19731450.0, 22867664.0, 23356034.0, 24169834.0, 25375270.0, 26970267.0, 27553681.0, 31500731.0, 31500732.0, 35705261.0]',
71: '[5931149.0, 19811894.0, 19812444.0, 22378265.0, 22409405.0, 23400964.0, 24164668.0, 25377816.0, 25484442.0, 26737825.0, 27395052.0, 27403058.0, 27517636.0]',
72: '[3772180.0, 4094759.0, 4099701.0, 4109923.0, 21758734.0, 22489510.0, 22802791.0, 23109074.0, 23332890.0, 23945495.0, 25404671.0, 26988331.0]',
73: '[22556333.0, 23537378.0, 23653584.0, 26050881.0, 26840895.0, 26877180.0, 27462050.0, 27463470.0]',
74: '[3775845.0, 24206625.0]',
75: '[4064369.0, 4172630.0, 8512849.0, 8513675.0, 10827902.0, 22681078.0, 24186095.0, 24990003.0, 26677157.0]',
76: '[4215108.0, 5754390.0, 6381956.0, 9309964.0, 13707851.0, 22117877.0]',
77: '[10969359.0, 11059344.0, 17714515.0, 19284446.0, 22690303.0, 26320567.0, 26415947.0]',
78: '[3888446.0, 3888996.0, 14727195.0, 22113364.0, 22782837.0, 25044309.0, 25167905.0, 26670443.0]',
79: '[3887054.0, 3889614.0, 3890522.0, 9303701.0, 9484895.0, 11363415.0, 14241244.0, 15291648.0, 16966026.0, 23250732.0, 24016081.0, 24393431.0, 24563127.0, 24788233.0, 25941613.0, 26366102.0, 27392409.0]',
80: '[27415886.0]'}}
within the list cited_docdb_list, however, there are ids that do not appear id docdb_family_id. What I would like to do is to detect the number of ids within cited_docdb_list which also appear in docdb_family_id. Is there a way to do so? My df is very large actually (almost 700000 observations). Please notice that the type of docdb_family_id and cited_docdb_list differs in the data.
The expected outcome, for instance for the first couple of docdb_family_ids should be:
docdb_family_id nb_included
3498148, 2
3512921, 1
...
where 3498148, 2 comes from the fact that the cited_docdb_list related to 3498148 cites 2 indices that appear in docdb_family_id, namely 3802281 and 3944218. In the same fashion, 3512921 cites 3800683 within cited_docdb_list.
Thank you
First idea is test intersection of sets with converted lists of strings to list of integers and get length of sets for nb_included:
import ast
df['cited_docdb_list'] = df['cited_docdb_list'].apply(ast.literal_eval)
sets = set(df['docdb_family_id'])
df['nb_included']=[len(set(map(int,x)).intersection(sets)) for x in df['cited_docdb_list']]
print (df)
docdb_family_id cited_docdb_list \
0 3498148 [3454392.0, 3489764.0, 3492286.0, 3802281.0, 3...
1 3512921 [22785397.0, 3800683.0]
2 3525647 [3508710.0, 3832248.0, 6015961.0, 9173676.0, 2...
3 3636418 [3482303.0, 3518675.0, 3688207.0, 3688953.0, 7...
4 3673165 [7917626.0, 13587294.0, 15860525.0, 16099836.0...
.. ... ...
76 3886195 [4215108.0, 5754390.0, 6381956.0, 9309964.0, 1...
77 3887480 [10969359.0, 11059344.0, 17714515.0, 19284446....
78 3890389 [3888446.0, 3888996.0, 14727195.0, 22113364.0,...
79 3892024 [3887054.0, 3889614.0, 3890522.0, 9303701.0, 9...
80 3944218 [27415886.0]
nb_included
0 2
1 1
2 1
3 1
4 0
.. ...
76 0
77 0
78 0
79 0
80 0
[81 rows x 3 columns]
Pandas solution with DataFrame.explode and Series.isin for test membership, last for count Trues aggregate sum:
df = (df.assign(cited_docdb_list = df['cited_docdb_list'].apply(ast.literal_eval))
.explode('cited_docdb_list')
.astype({'cited_docdb_list':int})
.assign(nb_included=lambda x: x['cited_docdb_list'].isin(x['docdb_family_id']))
.groupby('docdb_family_id', as_index=False)['nb_included']
.sum())
print (df)
docdb_family_id nb_included
0 3498148 2
1 3512921 1
2 3525647 1
3 3636418 1
4 3673165 0
.. ... ...
76 3886195 0
77 3887480 0
78 3890389 0
79 3892024 0
80 3944218 0
[81 rows x 2 columns]

Using shift function along with max function Pandas

I am attempting to create a technical indicator ('Supertrend') using Pandas. The formula for this column is recursive.
(For people familiar with Pinescript, this column will replicate the result of this Pinescript function):
df['st_trendup'] = np.select(df['Close'].shift() > df['st_trendup'].shift(),df[['st_up','st_trendup'.shift()]].max(axis=1),df['st_up'])
The problem occurs in the true part of the np.select()because I cannot call .shift() on a string.
Normally, I would make a new column that uses .shift() beforehand but since this is recursive, I have to do it all in one line.
If possible I'd like to avoid using loops for speed; prefer solutions using native pandas or numpy functions.
What I am looking for
A way to find max function that can accomodate a .shift() call
Columns that are used:
def tr(high,low,close1):
return max(high - low, abs(high - close1), abs(low - close1))
df['st_closeprev'] = df['Close'].shift()
df['st_hl2'] = (df['High']+df['Low'])/2
df['st_tr'] = df.apply(lambda row: tr(row['High'],row['Low'],row['st_closeprev']),axis=1)
df['st_atr'] = df['st_tr'].ewm(alpha = 1/pd,adjust=False,min_periods=pd).mean()
df['st_up'] = df['st_hl2'] - factor * df['st_atr']
df['st_dn'] = df['st_hl2'] + factor * df['st_atr']
df['st_trendup'] = np.select(df['Close'].shift() > df['st_trendup'].shift(),df[['st_up','st_trendup'.shift()]].max(axis=1),df['st_up'])
Sample data obtained by the df.to_dict
{'Date': {0: Timestamp('2021-01-01 09:15:00'),
1: Timestamp('2021-01-01 09:30:00'),
2: Timestamp('2021-01-01 09:45:00'),
3: Timestamp('2021-01-01 10:00:00'),
4: Timestamp('2021-01-01 10:15:00'),
5: Timestamp('2021-01-01 10:30:00'),
6: Timestamp('2021-01-01 10:45:00'),
7: Timestamp('2021-01-01 11:00:00'),
8: Timestamp('2021-01-01 11:15:00'),
9: Timestamp('2021-01-01 11:30:00'),
10: Timestamp('2021-01-01 11:45:00'),
11: Timestamp('2021-01-01 12:00:00'),
12: Timestamp('2021-01-01 12:15:00'),
13: Timestamp('2021-01-01 12:30:00'),
14: Timestamp('2021-01-01 12:45:00'),
15: Timestamp('2021-01-01 13:00:00'),
16: Timestamp('2021-01-01 13:15:00'),
17: Timestamp('2021-01-01 13:30:00'),
18: Timestamp('2021-01-01 13:45:00'),
19: Timestamp('2021-01-01 14:00:00'),
20: Timestamp('2021-01-01 14:15:00'),
21: Timestamp('2021-01-01 14:30:00'),
22: Timestamp('2021-01-01 14:45:00'),
23: Timestamp('2021-01-01 15:00:00'),
24: Timestamp('2021-01-01 15:15:00'),
25: Timestamp('2021-01-04 09:15:00')},
'Open': {0: 31250.0,
1: 31376.0,
2: 31405.0,
3: 31389.4,
4: 31377.5,
5: 31347.8,
6: 31310.8,
7: 31343.4,
8: 31349.5,
9: 31349.9,
10: 31325.1,
11: 31310.9,
12: 31329.0,
13: 31376.0,
14: 31375.5,
15: 31357.4,
16: 31325.0,
17: 31341.1,
18: 31300.0,
19: 31324.5,
20: 31353.3,
21: 31350.0,
22: 31346.9,
23: 31330.0,
24: 31314.3,
25: 31450.2},
'High': {0: 31407.0,
1: 31425.0,
2: 31411.95,
3: 31389.45,
4: 31382.0,
5: 31350.0,
6: 31354.6,
7: 31359.0,
8: 31370.0,
9: 31364.7,
10: 31350.0,
11: 31337.9,
12: 31378.9,
13: 31419.5,
14: 31377.75,
15: 31360.0,
16: 31367.15,
17: 31345.2,
18: 31340.0,
19: 31367.0,
20: 31375.0,
21: 31370.0,
22: 31350.0,
23: 31334.6,
24: 31329.6,
25: 31599.0},
'Low': {0: 31250.0,
1: 31367.95,
2: 31352.5,
3: 31331.65,
4: 31301.4,
5: 31303.05,
6: 31310.0,
7: 31325.05,
8: 31335.35,
9: 31315.35,
10: 31281.9,
11: 31292.0,
12: 31316.25,
13: 31352.05,
14: 31335.0,
15: 31322.0,
16: 31318.25,
17: 31261.55,
18: 31283.3,
19: 31324.5,
20: 31322.0,
21: 31332.15,
22: 31324.1,
23: 31300.15,
24: 31280.0,
25: 31430.0},
'Close': {0: 31375.0,
1: 31398.3,
2: 31386.0,
3: 31377.0,
4: 31342.3,
5: 31311.7,
6: 31345.0,
7: 31349.0,
8: 31344.2,
9: 31327.6,
10: 31311.3,
11: 31325.6,
12: 31373.0,
13: 31375.0,
14: 31357.4,
15: 31326.0,
16: 31345.9,
17: 31300.6,
18: 31324.4,
19: 31353.8,
20: 31345.6,
21: 31341.6,
22: 31332.5,
23: 31311.0,
24: 31285.0,
25: 31558.4},
'Volume': {0: 259952,
1: 163775,
2: 105900,
3: 99725,
4: 115175,
5: 78625,
6: 67675,
7: 46575,
8: 53350,
9: 54175,
10: 96975,
11: 80925,
12: 79475,
13: 147775,
14: 38900,
15: 64925,
16: 52425,
17: 142175,
18: 81800,
19: 74950,
20: 68550,
21: 40350,
22: 47150,
23: 119200,
24: 222875,
25: 524625}}
Change:
df[['st_up','st_trendup'.shift()]].max(axis=1)
to:
df[['st_up','st_trendup']].assign(st_trendup = df['st_trendup'].shift()).max(axis=1)

joining/merging both index and non-index columns in a pandas multi-index

Context:
I have two very large pandas dataframes to join which barely fit in memory (8GB each, millions of rows) and have the challenge of performing a performant join using combinations of both indexed and non-indexed columns. Fuzzy joining is out of scope.
Variables in order of cardinality:
dataset_1 has these variables:
postcode, street_name, secondary_number, primary_number, unique_id
dataset_2 has these variables:
postcode, street_name, house_number, house_name, sub_building_name, different_unique_id
postcode and street_name are shared keys, and multiindexing seems the correct choice to improve joining performance in pandas:
dataset_1 = dataset_1.set_index(['postcode', 'street', "unique_id"]).sort_index()
dataset_2 = dataset_2.set_index(['postcode', 'street', "different_unique_id"]).sort_index()
Processing:
At this stage I can compute in pandas if memory allows. If not, I would use Dask, however it can't handle multi-indexes. In the event this were possible (or unnecessary) the sorting would still need to be handled in pandas as Dask cannot manage this. If Dask were an option this is how I would convert:
dd1 = dd.from_pandas(dataset_1, npartitions=1) #large left dataframe
del dataset_1 #to release the memory
dd2 = dd.from_pandas(dataset_2, npartitions=3) #partitioned right dataframe for performance
del dataset_2 #to release the memory
Problem:
The challenge is performing an inner join on non-null variables using the indexes ("postcode" and "street"), alongside non-indexed columns. Combinations of the non-indexed variables will be iterated in a for loop.
Solution Sketch:
This gives an idea what I would like to do to maintain the performance gains from the indexing, but is of course not syntactically possible:
output = pd.merge(df1, df2, how='inner', left_on=["postcode", "street_name", "secondary_number", "primary_number"], right_on=["postcode", "street_name", "house_name", "house_number"], left_index=[True,True,False,False], right_index=[True,True,False,False])
Summary:
My understanding is that pd.join can handle non-indexed and indexed columns, whereas pd.merge cannot. As a result I'm unsure how to achieve this join in pd.join where there is a combination of both indexed and non-indexed columns.
Example of intersects:
{'different_unique_id': {27: '{582D0636-8DEF-8F22-E053-6C04A8C01BAC}',
41: '{D9E869FE-7B55-4C36-AC43-695B9033A13B}',
33: '{93E6821E-554E-40FD-E053-6B04A8C0C1DF}',
1: '{288DCE29-0589-E510-E050-A8C06205480E}',
48: '{3A23DDD5-A0E8-41D2-A514-5B09385C301F}',
52: '{CEB16957-F7FA-4D1B-B45F-A390214735BC}',
13: '{404A5AF3-9B20-CD2B-E050-A8C063055C7B}',
16: '{64342BFD-FD07-422C-E053-6C04A8C0FB8A}',
57: '{29A8E769-8A10-4477-9494-FF55EF5FAE4B}',
10: '{404A5AF3-0B58-CD2B-E050-A8C063055C7B}',
21: '{55BDCAE6-0C10-521D-E053-6B04A8C0DD7A}',
31: '{5C676A02-1781-4152-950C-6E5CA2CBC487}',
7: '{68FEB20B-142E-38DA-E053-6C04A8C051AE}',
45: '{8F1B26BD-673F-53DB-E053-6C04A8C03649}',
12: '{2F115F7A-8F81-4124-9FD4-FB76E742B2C1}',
36: '{344AB2D7-4B59-4AB4-8F52-75B29BE8C509}',
20: '{965B6D91-D4B6-95E4-E053-6C04A8C07729}',
56: '{59872FD9-F39D-4BB9-95F6-91E002D948B1}',
22: '{6141DFF0-973F-4FEC-A582-7F310B566031}'},
'unique_id': {27: 10002277489,
41: 64023255,
33: 10007367447,
1: 22229221,
48: 10033235735,
52: 100062162615,
13: 50103744,
16: 10022903998,
57: 12015624,
10: 12154940,
21: 10024247587,
31: 100041193990,
7: 10008230730,
45: 10091640210,
12: 202107394,
36: 5062293,
20: 48114659,
56: 10001311242,
22: 10000443154},
'street': {27: 'thewharf',
41: 'parkroad',
33: 'oldmillclose',
1: 'thirdavenue',
48: 'woolnersway',
52: 'sumnerroad',
13: 'cliftongardens',
16: 'windhamroad',
57: 'westparkroad',
10: 'grangeroad',
21: 'staplersroad',
31: 'strand',
7: 'amhurstroad',
45: 'eatonroad',
12: 'northendroad',
36: 'belsizegrove',
20: 'watermillway',
56: 'orchardplace',
22: 'thurlowparkroad'},
'postcode': {27: 'lu72la',
41: 'cf626nt',
33: 'hr40aq',
1: 'bn32pd',
48: 'sg13ae',
52: 'gu97jx',
13: 'ct202ef',
16: 'bh14rn',
57: 'ub24af',
10: 'w55bu',
21: 'po302dp',
31: 'tq148aq',
7: 'e82ag',
45: 'ch47ew',
12: 'ha90ae',
36: 'nw34tt',
20: 'sw192rw',
56: 'so143hw',
22: 'se218hp'},
'secondary_number': {27: '76',
41: 'flat6',
33: '49',
1: 'flat10',
48: '145',
52: '31',
13: 'flat19',
16: 'flat7',
57: '76',
10: 'flat1',
21: 'flat1',
31: 'flat43',
7: 'flata',
45: '8',
12: '42',
36: 'flat9',
20: 'flat43',
56: 'flat156',
22: 'flat2'},
'primary_number': {27: 'eastdock',
41: 'courtlands',
33: 'watkinscourt',
1: 'ascothouse',
48: 'monumentcourt',
52: 'sumnercourt',
13: '22-24',
16: '77',
57: 'osterleyviews',
10: '55-59',
21: '138',
31: 'leandercourt',
7: '130',
45: 'greenbankhall',
12: 'danescourt',
36: 'holmefieldcourt',
20: 'bennetscourtyard',
56: 'oceanaboulevard',
22: '124f'},
'building_name': {27: 'eastdock',
41: 'courtlands',
33: 'watkinscourt',
1: 'ascothouse',
48: 'monumentcourt',
52: 'sumnercourt',
13: None,
16: None,
57: 'osterleyviews',
10: None,
21: None,
31: 'leandercourt',
7: None,
45: 'greenbankhall',
12: 'danescourt',
36: 'holmefieldcourt',
20: 'bennetscourtyard',
56: 'oceanaboulevard',
22: None},
'building_number': {27: None,
41: None,
33: None,
1: '18-20',
48: None,
52: None,
13: '22-24',
16: '77',
57: None,
10: '55-59',
21: '138',
31: None,
7: '130',
45: None,
12: None,
36: None,
20: None,
56: None,
22: '124f'},
'sub_building': {27: '76',
41: 'flat6',
33: '49',
1: 'flat10',
48: '145',
52: '31',
13: 'flat19',
16: 'flat7',
57: '76',
10: 'flat1',
21: 'flat1',
31: 'flat43',
7: 'flata',
45: '8',
12: '42',
36: 'flat9',
20: 'flat43',
56: 'flat156',
22: 'flat2'}}

I am using Apache opennlp 1.8.0, I am trying to use POSTaggerTrainer for training

After referring to the documentation of version 1.8.0 I tried the CLI command given in the doc, it doesn't seem to be working, nor is the Java code given under API. I have a text file with the following text:
train-me.txt
Last_JJ September_NNP ,_, I_PRP tried_VBD to_TO find_VB out_RP the_DT
address_NN of_IN an_DT old_JJ school_NN friend_NN whom_WP I_PRP
had_VBD not_RB seen_VBN for_IN 15_CD years_NNS ._. I_PRP just_RB
knew_VBD his_PRP$ name_NN ,_, Alan_NNP McKennedy_NNP ,_, and_CC I_PRP
'd_MD heard_VBD the_DT rumour_NN that_IN he_PRP 'd_MD moved_VBD to_TO
Scotland_NNP ,_, the_DT country_NN of_IN his_PRP$ ancestors_NNS ._.
dictionary.xml
<?xml version="1.0" encoding="UTF-8"?><dictionary>
<entry tags="NNP">
<token>Calysta</token>
</entry>
</dictionary>
I want to use either of these two (if possible) to train the program to tag Calysta as Calysta_NNP
I know its kind of a late answer, but if it helps..
arjun#arjun-VPCEH26EN:~/apache-opennlp-1.8.0/bin$ ./opennlp POSTaggerTrainer -data train-me.txt -dict dictionary.xml -lang en -model en-pos-maxent-cust.bin
Indexing events using cutoff of 5
Computing event counts... done. 52 events
Indexing... done.
Sorting and merging events... done. Reduced 52 events to 37.
Done indexing.
Incorporating indexed data for training...
done.
Number of Event Tokens: 37
Number of Outcomes: 20
Number of Predicates: 13
...done.
Computing model parameters ...
Performing 100 iterations.
1: ... loglikelihood=-155.77807822480764 0.038461538461538464
2: ... loglikelihood=-130.9791219262959 0.5
3: ... loglikelihood=-115.82234962334346 0.5576923076923077
4: ... loglikelihood=-105.13170003394434 0.6730769230769231
5: ... loglikelihood=-96.9869322585347 0.6730769230769231
6: ... loglikelihood=-90.51694300405765 0.6923076923076923
7: ... loglikelihood=-85.23546058034727 0.6923076923076923
8: ... loglikelihood=-80.83562367302892 0.7307692307692307
9: ... loglikelihood=-77.1097811259408 0.7307692307692307
10: ... loglikelihood=-73.91120812658458 0.7307692307692307
11: ... loglikelihood=-71.13309894938885 0.75
12: ... loglikelihood=-68.69589846103266 0.75
13: ... loglikelihood=-66.53917914878002 0.75
14: ... loglikelihood=-64.61622830997396 0.75
15: ... loglikelihood=-62.890348665987055 0.75
16: ... loglikelihood=-61.332281582677155 0.75
17: ... loglikelihood=-59.91838269276684 0.75
18: ... loglikelihood=-58.629310291693805 0.75
19: ... loglikelihood=-57.44906823464401 0.75
20: ... loglikelihood=-56.36429724151985 0.75
21: ... loglikelihood=-55.36374258766163 0.75
22: ... loglikelihood=-54.43784870333842 0.75
23: ... loglikelihood=-53.57844629573773 0.75
24: ... loglikelihood=-52.77850781690259 0.75
25: ... loglikelihood=-52.03195408008879 0.75
26: ... loglikelihood=-51.333499646171695 0.75
27: ... loglikelihood=-50.67852796323892 0.75
28: ... loglikelihood=-50.062989611378285 0.75
29: ... loglikelihood=-49.48331869161687 0.75
30: ... loglikelihood=-48.93636361232364 0.75
31: ... loglikelihood=-48.419329410290345 0.75
32: ... loglikelihood=-47.92972939439551 0.75
33: ... loglikelihood=-47.465344384258486 0.75
34: ... loglikelihood=-47.02418818116749 0.75
35: ... loglikelihood=-46.604478186421446 0.75
36: ... loglikelihood=-46.20461029609541 0.75
37: ... loglikelihood=-45.82313736754338 0.75
38: ... loglikelihood=-45.458750683509976 0.75
39: ... loglikelihood=-45.11026394313063 0.75
40: ... loglikelihood=-44.77659939167084 0.75
41: ... loglikelihood=-44.45677576728319 0.75
42: ... loglikelihood=-44.14989779685863 0.75
43: ... loglikelihood=-43.855147016888836 0.75
44: ... loglikelihood=-43.571773731178716 0.75
45: ... loglikelihood=-43.299089946831224 0.75
46: ... loglikelihood=-43.03646315440174 0.75
47: ... loglikelihood=-42.78331083845189 0.75
48: ... loglikelihood=-42.53909562169248 0.75
49: ... loglikelihood=-42.30332096009808 0.7692307692307693
50: ... loglikelihood=-42.07552731829657 0.7692307692307693
51: ... loglikelihood=-41.85528876457919 0.7692307692307693
52: ... loglikelihood=-41.642209933359936 0.7692307692307693
53: ... loglikelihood=-41.43592331010347 0.7692307692307693
54: ... loglikelihood=-41.236086799846426 0.7692307692307693
55: ... loglikelihood=-41.04238154563922 0.7692307692307693
56: ... loglikelihood=-40.854509967677004 0.7692307692307693
57: ... loglikelihood=-40.67219399768791 0.7692307692307693
58: ... loglikelihood=-40.49517348640929 0.7692307692307693
59: ... loglikelihood=-40.32320476478338 0.7692307692307693
60: ... loglikelihood=-40.1560593419208 0.7692307692307693
61: ... loglikelihood=-39.99352272496435 0.7692307692307693
62: ... loglikelihood=-39.835393347789605 0.7692307692307693
63: ... loglikelihood=-39.68148159704321 0.7692307692307693
64: ... loglikelihood=-39.53160892537774 0.7692307692307693
65: ... loglikelihood=-39.38560704292392 0.7692307692307693
66: ... loglikelihood=-39.243317179072264 0.7692307692307693
67: ... loglikelihood=-39.10458940753585 0.7692307692307693
68: ... loglikelihood=-38.969282028454 0.7692307692307693
69: ... loglikelihood=-38.8372610019872 0.7692307692307693
70: ... loglikelihood=-38.70839942845979 0.7692307692307693
71: ... loglikelihood=-38.58257707064014 0.7692307692307693
72: ... loglikelihood=-38.45967991421811 0.7692307692307693
73: ... loglikelihood=-38.33959976295419 0.7692307692307693
74: ... loglikelihood=-38.222233865340385 0.7692307692307693
75: ... loglikelihood=-38.107484569938585 0.7692307692307693
76: ... loglikelihood=-37.995259006848066 0.7692307692307693
77: ... loglikelihood=-37.88546879301048 0.7692307692307693
78: ... loglikelihood=-37.77802975928638 0.7692307692307693
79: ... loglikelihood=-37.6728616974405 0.7692307692307693
80: ... loglikelihood=-37.56988812535212 0.7692307692307693
81: ... loglikelihood=-37.469036068928645 0.7692307692307693
82: ... loglikelihood=-37.370235859343474 0.7692307692307693
83: ... loglikelihood=-37.27342094434868 0.7692307692307693
84: ... loglikelihood=-37.178527712527796 0.7692307692307693
85: ... loglikelihood=-37.08549532945806 0.7692307692307693
86: ... loglikelihood=-36.99426558484419 0.7692307692307693
87: ... loglikelihood=-36.904782749769446 0.7692307692307693
88: ... loglikelihood=-36.81699344328549 0.7692307692307693
89: ... loglikelihood=-36.730846507630154 0.7692307692307693
90: ... loglikelihood=-36.64629289142378 0.7692307692307693
91: ... loglikelihood=-36.563285540250355 0.7692307692307693
92: ... loglikelihood=-36.48177929407976 0.7692307692307693
93: ... loglikelihood=-36.40173079103272 0.7692307692307693
94: ... loglikelihood=-36.32309837703207 0.7692307692307693
95: ... loglikelihood=-36.24584202091997 0.7692307692307693
96: ... loglikelihood=-36.16992323465651 0.7692307692307693
97: ... loglikelihood=-36.095304998244124 0.7692307692307693
98: ... loglikelihood=-36.021951689052344 0.7692307692307693
99: ... loglikelihood=-35.94982901524132 0.7692307692307693
100: ... loglikelihood=-35.87890395300729 0.7692307692307693
Writing pos tagger model ... done (0.086s)
Wrote pos tagger model to
path: /home/arjun/apache-opennlp-1.8.0/bin/en-pos-maxent-cust.bin
Execution time: 0.522 seconds
I used Apache OpenNLP 1.8.0. Revert if you need any help with Apache OpenNLP POS Tagger.

Creating a Dropdown menu in Plotly from Pandas

I've had a look at the following link but its not very clear https://plot.ly/pandas/dropdowns/.
I have the following figure generated in plotly but would like a dropdown menu (of A, B and C) to select and display the respective line only
import pandas as pd
import plotly
plotly.offline.init_notebook_mode()
import plotly.offline as py
from plotly.graph_objs import *
df = pd.DataFrame({'freq': {0: 0.01, 1: 0.02, 2: 0.029999999999999999, 3: 0.040000000000000001, 4: 0.050000000000000003, 5: 0.059999999999999998, 6: 0.070000000000000007, 7: 0.080000000000000002, 8: 0.089999999999999997, 9: 0.10000000000000001, 10: 0.01, 11: 0.02, 12: 0.029999999999999999, 13: 0.040000000000000001, 14: 0.050000000000000003, 15: 0.059999999999999998, 16: 0.070000000000000007, 17: 0.080000000000000002, 18: 0.089999999999999997, 19: 0.10000000000000001, 20: 0.01, 21: 0.02, 22: 0.029999999999999999, 23: 0.040000000000000001, 24: 0.050000000000000003, 25: 0.059999999999999998, 26: 0.070000000000000007, 27: 0.080000000000000002, 28: 0.089999999999999997, 29: 0.10000000000000001}, 'kit': {0: 'B', 1: 'B', 2: 'B', 3: 'B', 4: 'B', 5: 'B', 6: 'B', 7: 'B', 8: 'B', 9: 'B', 10: 'A', 11: 'A', 12: 'A', 13: 'A', 14: 'A', 15: 'A', 16: 'A', 17: 'A', 18: 'A', 19: 'A', 20: 'C', 21: 'C', 22: 'C', 23: 'C', 24: 'C', 25: 'C', 26: 'C', 27: 'C', 28: 'C', 29: 'C'}, 'SNS': {0: 91.198979591799997, 1: 90.263605442199989, 2: 88.818027210899999, 3: 85.671768707499993, 4: 76.23299319729999, 5: 61.0969387755, 6: 45.1530612245, 7: 36.267006802700003, 8: 33.0782312925, 9: 30.739795918400002, 10: 90.646258503400006, 11: 90.306122449, 12: 90.178571428600009, 13: 89.498299319699996, 14: 88.435374149599994, 15: 83.588435374200003, 16: 75.212585034, 17: 60.969387755100001, 18: 47.278911564600001, 19: 37.627551020399999, 20: 90.986394557800011, 21: 90.136054421799997, 22: 89.540816326499993, 23: 88.690476190499993, 24: 86.479591836799997, 25: 82.397959183699996, 26: 73.809523809499993, 27: 63.180272108800004, 28: 50.935374149700003, 29: 41.241496598699996}, 'FPR': {0: 1.0953616823100001, 1: 0.24489252678500001, 2: 0.15106142277199999, 3: 0.104478605177, 4: 0.089172822253300005, 5: 0.079856258734300009, 6: 0.065881413455800009, 7: 0.059892194050699996, 8: 0.059892194050699996, 9: 0.0578957875824, 10: 0.94097291541899997, 11: 0.208291741532, 12: 0.14773407865800001, 13: 0.107805949291, 14: 0.093165635189999998, 15: 0.082518134025399995, 16: 0.074532508152000007, 17: 0.065881413455800009, 18: 0.062554069341799995, 19: 0.061888600519100001, 20: 0.85313103081100006, 21: 0.18899314567100001, 22: 0.14107939043000001, 23: 0.110467824582, 24: 0.099820323417899995, 25: 0.085180009316599997, 26: 0.078525321088700001, 27: 0.073201570506399985, 28: 0.071870632860800004, 29: 0.0705396952153}})
fig = {
'data': [
{
'x': df[df['kit']==kit]['FPR'],
'y': df[df['kit']==kit]['SNS'],
'name': kit,
} for kit in ['A', 'B', 'C']
],
}
py.iplot(fig)
I'm not sure how to do this directly from plotly; however, you can use interact function from ipywidgets library.
In your case it will be the following:
from ipywidgets import interact
df = pd.DataFrame({'freq': {0: 0.01, 1: 0.02, 2: 0.029999999999999999, 3: 0.040000000000000001, 4: 0.050000000000000003, 5: 0.059999999999999998, 6: 0.070000000000000007, 7: 0.080000000000000002, 8: 0.089999999999999997, 9: 0.10000000000000001, 10: 0.01, 11: 0.02, 12: 0.029999999999999999, 13: 0.040000000000000001, 14: 0.050000000000000003, 15: 0.059999999999999998, 16: 0.070000000000000007, 17: 0.080000000000000002, 18: 0.089999999999999997, 19: 0.10000000000000001, 20: 0.01, 21: 0.02, 22: 0.029999999999999999, 23: 0.040000000000000001, 24: 0.050000000000000003, 25: 0.059999999999999998, 26: 0.070000000000000007, 27: 0.080000000000000002, 28: 0.089999999999999997, 29: 0.10000000000000001}, 'kit': {0: 'B', 1: 'B', 2: 'B', 3: 'B', 4: 'B', 5: 'B', 6: 'B', 7: 'B', 8: 'B', 9: 'B', 10: 'A', 11: 'A', 12: 'A', 13: 'A', 14: 'A', 15: 'A', 16: 'A', 17: 'A', 18: 'A', 19: 'A', 20: 'C', 21: 'C', 22: 'C', 23: 'C', 24: 'C', 25: 'C', 26: 'C', 27: 'C', 28: 'C', 29: 'C'}, 'SNS': {0: 91.198979591799997, 1: 90.263605442199989, 2: 88.818027210899999, 3: 85.671768707499993, 4: 76.23299319729999, 5: 61.0969387755, 6: 45.1530612245, 7: 36.267006802700003, 8: 33.0782312925, 9: 30.739795918400002, 10: 90.646258503400006, 11: 90.306122449, 12: 90.178571428600009, 13: 89.498299319699996, 14: 88.435374149599994, 15: 83.588435374200003, 16: 75.212585034, 17: 60.969387755100001, 18: 47.278911564600001, 19: 37.627551020399999, 20: 90.986394557800011, 21: 90.136054421799997, 22: 89.540816326499993, 23: 88.690476190499993, 24: 86.479591836799997, 25: 82.397959183699996, 26: 73.809523809499993, 27: 63.180272108800004, 28: 50.935374149700003, 29: 41.241496598699996}, 'FPR': {0: 1.0953616823100001, 1: 0.24489252678500001, 2: 0.15106142277199999, 3: 0.104478605177, 4: 0.089172822253300005, 5: 0.079856258734300009, 6: 0.065881413455800009, 7: 0.059892194050699996, 8: 0.059892194050699996, 9: 0.0578957875824, 10: 0.94097291541899997, 11: 0.208291741532, 12: 0.14773407865800001, 13: 0.107805949291, 14: 0.093165635189999998, 15: 0.082518134025399995, 16: 0.074532508152000007, 17: 0.065881413455800009, 18: 0.062554069341799995, 19: 0.061888600519100001, 20: 0.85313103081100006, 21: 0.18899314567100001, 22: 0.14107939043000001, 23: 0.110467824582, 24: 0.099820323417899995, 25: 0.085180009316599997, 26: 0.078525321088700001, 27: 0.073201570506399985, 28: 0.071870632860800004, 29: 0.0705396952153}})
def plot_it(kit):
fig = {
'data': [
{
'x': df[df['kit']==kit]['FPR'],
'y': df[df['kit']==kit]['SNS'],
'name': kit
}
]
}
py.iplot(fig)
interact(plot_it, kit=('A', 'B', 'C'))