Number of instances in a list variable pandas - pandas
in my database I have an id (docdb_family_id) and a list of ids (cited_docdb_list) as follows:
{'docdb_family_id': {0: 3498148,
1: 3512921,
2: 3525647,
3: 3636418,
4: 3673165,
5: 3680127,
6: 3688953,
7: 3689983,
8: 3700898,
9: 3768731,
10: 3770463,
11: 3771404,
12: 3771425,
13: 3771495,
14: 3771604,
15: 3772274,
16: 3772510,
17: 3772940,
18: 3775109,
19: 3779413,
20: 3783583,
21: 3784332,
22: 3784469,
23: 3787179,
24: 3787982,
25: 3790639,
26: 3790670,
27: 3792458,
28: 3795015,
29: 3799670,
30: 3800683,
31: 3802132,
32: 3802281,
33: 3803326,
34: 3803728,
35: 3808684,
36: 3809416,
37: 3810114,
38: 3811389,
39: 3812435,
40: 3813073,
41: 3813312,
42: 3815934,
43: 3816821,
44: 3816927,
45: 3817424,
46: 3818542,
47: 3818766,
48: 3819057,
49: 3819335,
50: 3820633,
51: 3820694,
52: 3821540,
53: 3821838,
54: 3822049,
55: 3822089,
56: 3823057,
57: 3823114,
58: 3824187,
59: 3824375,
60: 3825785,
61: 3826171,
62: 3826211,
63: 3827560,
64: 3828464,
65: 3829519,
66: 3829990,
67: 3831455,
68: 3831510,
69: 3831784,
70: 3831999,
71: 3832248,
72: 3832987,
73: 3834046,
74: 3834444,
75: 3835251,
76: 3886195,
77: 3887480,
78: 3890389,
79: 3892024,
80: 3944218},
'cited_docdb_list': {0: '[3454392.0, 3489764.0, 3492286.0, 3802281.0, 3944218.0, 4161113.0, 6055754.0, 4167218.0, 6245259.0, 6310327.0, 6339325.0, 7865817.0, 10818295.0, 21820994.0, 25257112.0, 25333370.0, 25421470.0]',
1: '[22785397.0, 3800683.0]',
2: '[3508710.0, 3832248.0, 6015961.0, 9173676.0, 22615010.0]',
3: '[3482303.0, 3518675.0, 3688207.0, 3688953.0, 7856041.0, 9893906.0, 9911676.0, 21740142.0, 22095959.0, 22224845.0, 22455261.0, 22522023.0, 23039462.0, 23149018.0, 23248627.0, 25608484.0, 26145960.0, 26246393.0, 27122358.0, 27215945.0, 27267946.0, 27368911.0, 27535943.0, 27569239.0, 27759996.0, 34107815.0, 35219296.0, 46248356.0]',
4: '[7917626.0, 13587294.0, 15860525.0, 16099836.0, 18349663.0, 18831836.0, 24223941.0, 26558149.0]',
5: '[3680147.0, 3680169.0, 6442447.0, 8168860.0, 8170479.0, 8178540.0, 8178541.0, 10655404.0, 10764890.0, 10765687.0, 11600956.0, 14593411.0, 22296890.0, 22471622.0, 24169239.0, 24966171.0, 25033444.0, 25166841.0, 25372199.0, 25459000.0, 25533862.0, 25918313.0, 26371384.0, 26439834.0, 27274967.0, 27294655.0, 27523014.0]',
6: '[5459370.0, 16645542.0, 17462457.0, 21959571.0, 22010115.0, 22296144.0, 26927437.0, 33041169.0, 33101777.0, 34066530.0]',
7: '[7650618.0, 7806400.0, 7835575.0, 7857812.0, 8210353.0, 8232323.0, 8239494.0, 10024300.0, 11566936.0, 11637978.0, 11942149.0, 12192469.0, 12437164.0, 12474858.0, 12862377.0, 13357403.0, 13391145.0, 13884195.0, 14268316.0, 14780600.0, 14837681.0, 14959673.0, 15493334.0, 15660109.0, 15690908.0, 15706187.0, 15740492.0, 16185014.0, 16286275.0, 16301821.0, 16400795.0, 16599264.0, 16867936.0, 17017842.0, 17303135.0, 18156945.0, 18168645.0, 18351330.0, 18357701.0, 18361853.0, 18553020.0, 18665747.0, 22042028.0, 22509938.0, 22752953.0, 22752985.0, 22955054.0, 23605846.0, 23635250.0, 24042617.0, 24281660.0, 24426092.0, 24470177.0, 25217414.0, 25342266.0, 25399276.0, 25481652.0, 26026958.0, 26034429.0, 26150729.0, 26427482.0, 26488815.0, 26500234.0, 26537700.0, 26644976.0, 26692209.0, 26785282.0, 27339916.0, 27370666.0, 27372394.0, 27524906.0, 27563165.0, 29229947.0, 49274340.0]',
8: '[3764296.0, 3770459.0, 3773222.0, 3811210.0, 3825785.0, 6119308.0, 6262275.0, 6409776.0, 6450504.0, 6484157.0, 7640046.0, 7646955.0, 7762359.0, 7813503.0, 7823236.0, 7886063.0, 8103745.0, 10347742.0, 10563528.0, 11894269.0, 12556976.0, 12589238.0, 12666170.0, 12673679.0, 12702964.0, 13630878.0, 14026520.0, 14271281.0, 14325872.0, 14416179.0, 15383496.0, 15479503.0, 15920227.0, 16127226.0, 16222285.0, 16339588.0, 16871054.0, 16912938.0, 16912954.0, 16913656.0, 17401011.0, 17461197.0, 17474177.0, 17663812.0, 17724327.0, 18063449.0, 18227455.0, 18250669.0, 18386252.0, 18426307.0, 18587018.0, 18654484.0, 19300409.0, 19312456.0, 19372912.0, 19550439.0, 19638358.0, 19704233.0, 21801532.0, 21877403.0, 21974791.0, 22002267.0, 22067617.0, 22089128.0, 22098429.0, 22223747.0, 22276463.0, 22298327.0, 22341037.0, 22385483.0, 22395684.0, 22676560.0, 22731313.0, 22904054.0, 22918676.0, 23080548.0, 23084056.0, 23402016.0, 23516757.0, 23601888.0, 23628604.0, 23848237.0, 24030077.0, 24083853.0, 24132340.0, 24248118.0, 24251602.0, 24295241.0, 24316904.0, 24422851.0, 24429865.0, 24443752.0, 24547890.0, 24589548.0, 24632640.0, 24770649.0, 24785182.0, 24839047.0, 24962082.0, 25028009.0, 25378809.0, 25397848.0, 25410040.0, 25434196.0, 25449992.0, 25470970.0, 25494098.0, 25514405.0, 25525923.0, 25540364.0, 26040210.0, 26438189.0, 26450647.0, 26486031.0, 26707770.0, 26723069.0, 26723453.0, 26748272.0, 26870598.0, 26889379.0, 26889380.0, 26901249.0, 26985941.0, 26990011.0, 27000869.0, 27018916.0, 27025822.0, 27060755.0, 27060756.0, 27311622.0, 27315336.0, 27340467.0, 27569697.0, 37944191.0, 46149961.0, 46255262.0]',
9: '[8583594.0, 9119276.0, 21793982.0, 22133036.0, 24149220.0, 25776190.0, 26736757.0]',
10: '[10568655.0, 13302684.0, 19844775.0, 22493955.0, 26714695.0, 26997884.0]',
11: '[4344006.0, 24838031.0, 25098959.0, 25395637.0, 27025593.0]',
12: '[25642630.0, 25642846.0, 25642930.0, 26279148.0, 26287348.0]',
13: '[10451245.0, 10564358.0, 22491246.0, 24064440.0, 24279325.0, 24519613.0, 24651262.0, 25072503.0, 26461666.0, 26692304.0]',
14: '[4351264.0, 4384434.0, 6117960.0, 9116940.0, 10999954.0, 22148709.0, 22562211.0, 23862977.0, 24037344.0, 24361917.0, 24432647.0, 25076138.0, 26840072.0, 27429215.0]',
15: '[3692248.0, 6053171.0, 6226485.0, 12362875.0, 27371744.0]',
16: '[5933264.0, 6125219.0, 6247996.0, 10521070.0, 13063586.0, 15774983.0, 16803481.0, 16904934.0, 22065174.0, 27127184.0, 27496706.0, 27624793.0]',
17: '[3526456.0, 6170998.0, 6335295.0, 10505184.0, 11549684.0, 14422646.0, 15088415.0, 17645959.0, 22169836.0, 22901756.0, 22994874.0, 22994878.0, 23172874.0, 23925148.0, 25244507.0, 27389063.0]',
18: '[6350760.0, 20369026.0, 24216636.0, 26762272.0, 26927655.0, 27126594.0, 27371255.0]',
19: '[3775878.0, 6008063.0, 12812693.0, 13575794.0, 14790639.0, 22013262.0, 24622370.0, 26901485.0, 26985941.0, 27076644.0, 27112632.0]',
20: '[3775488.0, 10948289.0, 10952971.0, 10952974.0, 11367322.0, 12710129.0, 15469131.0, 22577881.0, 25644554.0, 26467182.0, 26933783.0, 27401801.0]',
21: '[6134715.0, 6350620.0, 15983939.0, 16269143.0, 17680987.0, 23994234.0, 24992672.0, 26268730.0, 26367621.0, 26629308.0, 26787837.0, 26988835.0, 27365620.0, 27455735.0, 27476152.0, 41508342.0]',
22: '[3690998.0, 3779413.0, 8103745.0, 10528617.0, 10533016.0, 14026520.0, 17474177.0, 21959397.0, 22069056.0, 23038428.0, 23077293.0, 24078130.0, 24160889.0, 25618055.0, 26462451.0, 27407332.0, 27569697.0]',
23: '[6512805.0, 8105738.0, 10680104.0, 10719170.0, 18290174.0, 22237701.0, 22290947.0, 23695912.0, 23765282.0, 24565635.0, 26289399.0, 27358491.0, 27420192.0]',
24: '[6462400.0, 16101703.0, 24045826.0, 25612324.0, 26283893.0, 26434155.0]',
25: '[8208100.0, 23566456.0, 23702554.0, 25266985.0, 26142859.0]',
26: '[3771632.0, 14240231.0, 15623240.0, 22486268.0, 23605938.0, 27170740.0]',
27: '[3798105.0, 46299235.0, 46299236.0, 46299237.0, 46299238.0, 46299740.0, 46299800.0]',
28: '[2631556.0, 2944019.0, 10790311.0, 13793711.0, 18470587.0, 21851951.0, 21924559.0, 23889759.0, 23927439.0, 23963011.0, 24766696.0, 26713651.0, 26990589.0, 27287227.0]',
29: '[3796218.0, 24589826.0, 25624390.0, 25765848.0]',
30: '[3772972.0, 6025591.0, 7764892.0, 12805981.0, 15547363.0, 16262273.0, 21905352.0, 22082762.0, 23922610.0, 23984212.0, 24257317.0, 25315731.0, 25402356.0, 25518280.0, 26719186.0, 26734227.0, 26940453.0, 26979759.0, 27025821.0, 27025822.0]',
31: '[3779080.0, 3794389.0, 9425562.0, 10768435.0, 22860582.0, 25471727.0, 25617513.0, 25620315.0, 25644721.0, 26092132.0, 27153345.0]',
32: '[2634854.0, 3700806.0, 3802276.0, 3802292.0, 3802325.0, 3802326.0, 3802327.0, 3802332.0, 3802333.0, 3802334.0, 3802337.0, 3802338.0, 3802339.0, 3802354.0, 3802356.0, 3805158.0, 3805178.0, 3805242.0, 3806854.0, 3808228.0, 3808232.0, 3808236.0, 3810760.0, 4258298.0, 6062612.0, 6161522.0, 6180029.0, 6243195.0, 6328004.0, 6352957.0, 6397822.0, 6415485.0, 6456158.0, 6476429.0, 6495895.0, 7588639.0, 9099878.0, 9119945.0, 9447476.0, 9454581.0, 9460842.0, 10036436.0, 10089783.0, 10642403.0, 10676758.0, 10702950.0, 10729821.0, 10746269.0, 11194385.0, 11411510.0, 11592343.0, 12638122.0, 12808119.0, 13792188.0, 13869248.0, 13880272.0, 14224791.0, 14363363.0, 14475114.0, 14555145.0, 14654996.0, 14659718.0, 14905880.0, 15009474.0, 15208979.0, 15365386.0, 15418108.0, 15427440.0, 15532726.0, 15759142.0, 15839949.0, 16148732.0, 16454470.0, 16472116.0, 16557241.0, 16567151.0, 16574330.0, 16670501.0, 16826733.0, 16866056.0, 16917358.0, 16952937.0, 17009237.0, 17042089.0, 17152410.0, 17167043.0, 17167057.0, 17176980.0, 17177751.0, 17203313.0, 17214040.0, 17359106.0, 17384372.0, 17390431.0, 17398779.0, 17419690.0, 17521757.0, 17541035.0, 17548222.0, 17692283.0, 17709222.0, 17752106.0, 17787836.0, 17980830.0, 18032898.0, 18091978.0, 18108188.0, 18157469.0, 18183177.0, 18202974.0, 18210551.0, 18356218.0, 18513671.0, 20358277.0, 21694022.0, 21760302.0, 21839477.0, 22005188.0, 22196129.0, 22231670.0, 22241704.0, 22321076.0, 22407725.0, 22574957.0, 22624317.0, 22688378.0, 22819977.0, 22837041.0, 22856540.0, 22891528.0, 22899520.0, 22911089.0, 22957363.0, 22978599.0, 23009341.0, 23016791.0, 23017033.0, 23194812.0, 23238114.0, 23242315.0, 23372955.0, 23403394.0, 23583171.0, 23717292.0, 23818247.0, 23822065.0, 23967128.0, 24023429.0, 24035021.0, 24041033.0, 24056428.0, 24092174.0, 24102216.0, 24115524.0, 24258574.0, 24305268.0, 24384033.0, 24407235.0, 24437414.0, 24440441.0, 24511068.0, 24607773.0, 24618564.0, 24640870.0, 24695776.0, 24712750.0, 24771021.0, 24777130.0, 24782249.0, 24802597.0, 24824797.0, 24857748.0, 24902244.0, 24921608.0, 24928011.0, 24981047.0, 24992362.0, 25006081.0, 25056097.0, 25079341.0, 25079896.0, 25098400.0, 25128528.0, 25157096.0, 25175720.0, 25184562.0, 25211651.0, 25273616.0, 25325219.0, 25395409.0, 25430909.0, 25431399.0, 25441093.0, 25458263.0, 25459754.0, 25478333.0, 25511171.0, 25540179.0, 25644902.0, 25645209.0, 25645479.0, 25645484.0, 25645485.0, 25645493.0, 25645494.0, 25645495.0, 25645496.0, 25645497.0, 25645498.0, 25645507.0, 25645510.0, 25645511.0, 25645513.0, 25645515.0, 25645516.0, 25645517.0, 25645524.0, 25645526.0, 25645531.0, 25645539.0, 25645541.0, 25645542.0, 25645545.0, 25645546.0, 25645548.0, 26138811.0, 26227739.0, 26352404.0, 26435079.0, 26437848.0, 26443181.0, 26495400.0, 26535740.0, 26564673.0, 26687357.0, 26688326.0, 26719816.0, 26767248.0, 26792309.0, 26883761.0, 27005599.0, 27048622.0, 27054476.0, 27157854.0, 27158025.0, 27204375.0, 27278808.0, 27279445.0, 27288524.0, 27308865.0, 27324977.0, 27325474.0, 27329746.0, 27339109.0, 27376149.0, 27467592.0, 27522909.0, 27526374.0, 27530134.0, 27530208.0, 27542962.0, 27550891.0, 27551736.0, 27552627.0, 27554184.0, 27554356.0, 27557355.0, 27577752.0, 27578291.0, 28455314.0, 29248275.0, 29999199.0, 31994773.0, 32302805.0, 32324813.0, 34221894.0, 34753905.0, 36808782.0, 36954792.0, 38226628.0, 38622001.0, 38622009.0, 46253718.0, 46302623.0, 46302626.0, 46330220.0, 56289952.0]',
33: '[9193201.0, 9690456.0, 11262890.0, 11857463.0, 20399558.0, 22182248.0, 23000715.0, 23242310.0, 23324343.0, 23849738.0, 24920698.0, 26305246.0]',
34: '[2103060.0, 3773965.0, 3774544.0, 3775695.0, 3775872.0, 3776256.0, 3786612.0, 3791581.0, 5870313.0, 5916275.0, 6021141.0, 6199234.0, 6245542.0, 6295893.0, 6295894.0, 6295895.0, 6296520.0, 6365302.0, 6421653.0, 6453213.0, 6470668.0, 6470669.0, 6505848.0, 7762300.0, 7996364.0, 8204435.0, 8504791.0, 8516769.0, 8537466.0, 9978587.0, 10525500.0, 10532630.0, 11421697.0, 11861168.0, 11938229.0, 12631519.0, 14831183.0, 15028144.0, 19729781.0, 19865575.0, 20357413.0, 21762166.0, 21916786.0, 22585241.0, 22736795.0, 22800842.0, 22821355.0, 23120569.0, 23397799.0, 23436004.0, 23481575.0, 23518025.0, 23722477.0, 23740173.0, 23790685.0, 23790691.0, 23790693.0, 23844609.0, 23967824.0, 24169834.0, 24225931.0, 24575089.0, 24686268.0, 24701256.0, 24701581.0, 24738797.0, 24962380.0, 25062108.0, 25145546.0, 25220031.0, 25326521.0, 25341958.0, 25350944.0, 25375270.0, 25532312.0, 25636025.0, 25671453.0, 25782505.0, 25782589.0, 26158327.0, 26516437.0, 26877119.0, 26950677.0, 27100111.0, 27157416.0, 27167473.0, 27286248.0, 27339086.0, 27339905.0, 27356707.0, 27404057.0, 27414896.0, 27461178.0, 27462950.0, 27464289.0, 27477792.0, 27490121.0, 41667474.0]',
35: '[3775287.0, 24656178.0, 25590998.0, 26752872.0, 27104052.0, 27111638.0, 27154855.0, 27449240.0, 27505577.0]',
36: '[10966704.0, 14429073.0, 14796404.0, 24388079.0, 25634499.0, 55024694.0]',
37: '[3810974.0, 6485046.0, 8220639.0, 10710317.0, 24372965.0, 25336013.0, 26139248.0, 30115768.0, 31188433.0, 34102684.0, 35502814.0, 41505355.0, 44170427.0, 46325309.0]',
38: '[3087303.0, 4124422.0, 20979317.0, 21870465.0, 23941444.0, 25013107.0, 25326934.0, 25638943.0, 26674623.0, 27041345.0, 27357929.0, 27505577.0]',
39: '[3796218.0, 3799670.0, 13202074.0, 16015369.0, 18376479.0, 21761811.0, 22420460.0, 25064869.0, 25362187.0, 25420991.0, 25645622.0]',
40: '[6383399.0, 11571184.0, 16203469.0, 19328209.0, 19338037.0, 23609959.0, 23669719.0, 24172105.0, 24533474.0, 25545404.0, 27031913.0, 27475424.0]',
41: '[3790030.0, 10844179.0, 17904788.0, 25518619.0, 25644273.0, 26230725.0, 27107515.0, 27358315.0]',
42: '[3765777.0, 5219438.0, 6509530.0, 9401909.0, 10606015.0, 11550806.0, 12762794.0, 13827315.0, 14042779.0, 15264928.0, 15458075.0, 15925094.0, 16128449.0, 17054858.0, 18055051.0, 18471454.0, 21862046.0, 22293413.0, 22679682.0, 24127226.0, 24176606.0, 24248291.0, 24679083.0, 25083983.0, 25400937.0, 26366826.0, 26985312.0]',
43: '[3775287.0, 3776915.0, 21721135.0, 22104735.0, 22570362.0, 25326934.0, 25584184.0, 25586333.0, 25638943.0, 26759870.0]',
44: '[4173143.0, 7807763.0, 13522010.0, 13654473.0, 13927771.0, 15719616.0, 16249907.0, 16525019.0, 21694632.0, 22093627.0, 22464844.0, 22964817.0, 23061734.0, 23211210.0, 23691361.0, 23831988.0, 23938149.0, 24244391.0, 24684633.0, 25241119.0, 25530551.0, 26801599.0, 27370214.0, 27539801.0, 27556890.0, 46249740.0]',
45: '[7830781.0, 7843024.0, 7852695.0, 8237386.0, 9444575.0, 10762585.0, 21739343.0, 21899596.0, 22200593.0, 23421862.0, 24138149.0, 25127817.0, 26792398.0, 33378328.0]',
46: '[24175325.0, 26752769.0, 26865384.0]',
47: '[3808127.0, 22989911.0, 22991587.0, 24661354.0, 25009434.0]',
48: '[3801540.0, 5986989.0, 7758470.0, 13433718.0, 13869888.0, 13870030.0, 13870091.0, 15253727.0, 15460683.0, 15581976.0, 15640684.0, 16014121.0, 17269442.0, 17330959.0, 18272758.0, 18289278.0, 19819299.0, 22635021.0, 22763032.0, 24234146.0, 25270151.0, 25330011.0, 26481016.0, 26873860.0, 30798811.0]',
49: '[4161793.0, 21787085.0, 22034688.0, 23282114.0, 24428824.0, 25016295.0]',
50: '[3793081.0, 3803264.0, 4207952.0, 11470889.0, 11669056.0, 12523378.0, 12636851.0, 12730154.0, 15584724.0, 16344287.0, 17109625.0, 17721742.0, 17745772.0, 17910462.0, 18186065.0, 18210837.0, 18223914.0, 21639272.0, 22927223.0, 26708844.0, 27047225.0, 27290433.0, 27308607.0, 27314463.0, 27584488.0, 60520854.0]',
51: '[26150927.0, 27292634.0]',
52: '[2092705.0, 2855690.0, 3448135.0, 3808851.0, 4531792.0, 7778731.0, 12783185.0, 17298876.0, 20135092.0, 20175428.0, 20913824.0, 21599292.0, 22046526.0, 22607332.0, 22691016.0, 22787233.0, 22930717.0, 23249413.0, 23308386.0, 23380573.0, 23824923.0, 23929977.0, 23970974.0, 24197297.0, 24485989.0, 25130652.0, 26732210.0, 26735928.0, 26743678.0, 26786285.0, 27584461.0, 29547928.0, 31990350.0, 78669067.0]',
53: '[22113349.0, 26695070.0, 27119373.0, 27493256.0]',
54: '[3777847.0, 3790007.0, 21871161.0, 22030506.0, 22031745.0, 22176213.0, 22401126.0, 23088391.0, 25613851.0, 25646253.0, 26671540.0, 26863907.0, 26903057.0, 27397174.0, 39338541.0]',
55: '[3802927.0, 3823288.0, 4984890.0, 4989432.0, 5073611.0, 5082137.0, 6061217.0, 6348178.0, 6423623.0, 10965588.0, 15797375.0, 18127308.0, 18175653.0, 18289498.0, 18849747.0, 21800742.0, 22397195.0, 23221251.0, 23468869.0, 23690813.0, 24191813.0, 24284509.0, 24708045.0, 24855719.0, 25014176.0, 25360346.0, 26846684.0, 27033183.0, 27275736.0, 27331606.0, 27490188.0, 27535521.0, 27568184.0, 27574439.0, 27578281.0, 27578284.0, 27650233.0, 34549244.0, 34746656.0, 35542271.0, 35736297.0, 36587440.0, 37433822.0, 37967362.0, 38022911.0, 38066849.0, 39925109.0, 46251516.0, 46252778.0, 46252929.0]',
56: '[1343281.0, 1345715.0, 3512210.0, 3783167.0, 4382571.0, 5813114.0, 7093752.0, 8235578.0, 8518638.0, 8783563.0, 8850107.0, 9121566.0, 9923753.0, 9955607.0, 10692798.0, 12383956.0, 12776229.0, 12886199.0, 12969910.0, 14707530.0, 14889080.0, 15072156.0, 19041276.0, 20298361.0, 21688702.0, 21900949.0, 21937269.0, 22104118.0, 22153767.0, 22186346.0, 22826706.0, 22855741.0, 22953235.0, 23004360.0, 23134063.0, 23354534.0, 23591524.0, 24305737.0, 24462242.0, 24489942.0, 24592901.0, 24641378.0, 25198004.0, 25253475.0, 25275454.0, 25432521.0, 25488956.0, 25643518.0, 26068855.0, 26166520.0, 26320235.0, 26328728.0, 26331139.0, 26428311.0, 26693295.0, 26791936.0, 26793455.0, 26961378.0, 26972264.0, 27059428.0, 27157985.0, 27313342.0, 27379089.0, 27395407.0, 27399829.0, 27424041.0, 27424409.0, 27517571.0, 27547373.0, 27584206.0, 28676052.0, 29709654.0, 29765036.0, 30774464.0, 32030450.0, 33159613.0, 33476757.0, 34135377.0, 34193337.0, 34958524.0, 36144355.0, 36567630.0, 36950563.0, 36971922.0, 37494273.0, 37855421.0, 37911312.0, 37989420.0, 38051788.0, 38218330.0, 38345747.0, 38420621.0, 38624732.0, 38823526.0, 38876900.0, 38962587.0, 39101659.0, 39226884.0, 39271180.0, 39387557.0, 39439714.0, 39561752.0, 39643971.0, 39673143.0, 39688790.0, 39748498.0, 39758481.0, 39789493.0, 39832372.0, 40003041.0, 40227969.0, 40380014.0, 40511531.0, 40565551.0, 40567797.0, 40624345.0, 40667466.0, 40824391.0, 40944227.0, 41129307.0, 41210096.0, 41277879.0, 41398494.0, 42073897.0, 42310155.0, 42546349.0, 42727821.0, 42826416.0, 42993293.0, 43014521.0, 43062470.0, 43220481.0, 43223027.0, 43301173.0, 43357321.0, 43478228.0, 43823348.0, 43876770.0, 44319684.0, 44369791.0, 44486085.0, 44531864.0, 45035300.0, 45066335.0, 45493803.0, 45495953.0, 45559863.0, 45925310.0, 45927155.0, 46300493.0, 46328187.0, 46798573.0, 46928022.0, 47018208.0, 47219813.0, 47296708.0, 47882498.0, 48535050.0, 48613244.0, 48692634.0, 49624233.0, 50184623.0, 50773492.0, 50775319.0, 51263061.0, 51581192.0, 51581222.0, 51842981.0, 52278746.0, 52280706.0, 52466999.0, 52544493.0, 52779208.0, 53403873.0, 54287989.0, 54782889.0, 54929359.0, 55019821.0, 55646830.0, 57249384.0, 57249913.0, 57325991.0, 59743243.0]',
57: '[3796196.0, 21858396.0, 25495565.0]',
58: '[3813145.0, 4154951.0, 6018005.0, 6040632.0, 6179742.0, 6395409.0, 6481277.0, 9158815.0, 9288505.0, 10699030.0, 13165538.0, 13755942.0, 14985984.0, 15515377.0, 15951653.0, 21965800.0, 22532548.0, 23301780.0, 23973288.0, 24550262.0, 24731087.0, 24876009.0, 25480283.0, 25489069.0, 26724897.0, 27296379.0, 27358904.0, 27410676.0, 46252098.0]',
59: '[3781927.0, 3789640.0, 3813305.0, 10731687.0, 11027021.0, 20414469.0, 23714925.0, 32595626.0, 33029875.0]',
60: '[3700898.0, 3764296.0, 3770459.0, 3773222.0, 3811210.0, 5987130.0, 6119308.0, 6262275.0, 6409776.0, 6450504.0, 6484157.0, 7640046.0, 7646955.0, 7762359.0, 7812486.0, 7813503.0, 7823236.0, 7886063.0, 8103745.0, 10347742.0, 10563528.0, 11004384.0, 11509383.0, 12543065.0, 12556976.0, 12589238.0, 12653339.0, 12666170.0, 12673679.0, 12702964.0, 14026520.0, 14266412.0, 14271281.0, 14325872.0, 14416179.0, 14516479.0, 14785130.0, 15044247.0, 15383496.0, 16127226.0, 16222285.0, 16960430.0, 17266862.0, 17401011.0, 17461197.0, 17474177.0, 17724327.0, 18063449.0, 18250669.0, 18265166.0, 18426307.0, 19300409.0, 19312456.0, 19372912.0, 19550439.0, 19638358.0, 19704233.0, 21801532.0, 21877403.0, 21974791.0, 22002267.0, 22026693.0, 22067617.0, 22089128.0, 22098429.0, 22164670.0, 22223747.0, 22244680.0, 22276463.0, 22298327.0, 22341037.0, 22385483.0, 22395684.0, 22439618.0, 22676560.0, 22718956.0, 22731313.0, 22904054.0, 22918676.0, 23080548.0, 23084056.0, 23218996.0, 23402016.0, 23423296.0, 23516757.0, 23601888.0, 23628604.0, 23848237.0, 23994110.0, 24030077.0, 24083853.0, 24132340.0, 24248118.0, 24295241.0, 24316904.0, 24422851.0, 24429865.0, 24443752.0, 24547890.0, 24589548.0, 24632640.0, 24741062.0, 24770649.0, 24785182.0, 24828348.0, 24839047.0, 24962082.0, 25028009.0, 25031599.0, 25341468.0, 25342918.0, 25378809.0, 25397848.0, 25410040.0, 25434196.0, 25449992.0, 25470970.0, 25494098.0, 25501373.0, 25514405.0, 25525923.0, 25540364.0, 26040210.0, 26228525.0, 26438189.0, 26450647.0, 26451566.0, 26470665.0, 26486031.0, 26707770.0, 26723069.0, 26723453.0, 26735162.0, 26748272.0, 26754314.0, 26870598.0, 26889379.0, 26889380.0, 26901249.0, 26985941.0, 26989589.0, 27000869.0, 27018916.0, 27025822.0, 27060755.0, 27060756.0, 27218208.0, 27293276.0, 27311622.0, 27316775.0, 27340467.0, 27569697.0, 31501140.0, 34800104.0, 37944191.0, 46149961.0, 46255262.0]',
61: '[3815245.0, 3817049.0, 4133414.0, 4237390.0, 6139410.0, 6302055.0, 6327475.0, 6359463.0, 7761745.0, 10634188.0, 10656776.0, 10799990.0, 11834232.0, 16311228.0, 16686050.0, 17340430.0, 21736076.0, 21792800.0, 22060322.0, 22083057.0, 22105805.0, 22177967.0, 22267098.0, 22415413.0, 22587189.0, 22605414.0, 22605428.0, 22626741.0, 22915051.0, 22915132.0, 22915137.0, 22916043.0, 23096413.0, 23212725.0, 23567105.0, 23567123.0, 23591762.0, 23793319.0, 23812585.0, 24102064.0, 24464348.0, 24622307.0, 25253365.0, 25352342.0, 25353269.0, 25427184.0, 25545290.0, 25671035.0, 26295357.0, 26368255.0, 26595469.0, 26726319.0, 26743135.0, 26822697.0, 26997208.0, 26997210.0, 27015502.0, 27015504.0, 27035582.0, 27038209.0, 27056966.0, 27062452.0, 27081705.0, 27383119.0, 27494547.0, 27547324.0]',
62: '[8193903.0, 8212273.0, 9247849.0, 9463029.0, 10512343.0, 11040434.0, 19848880.0, 21871975.0, 22614354.0, 25182231.0, 25355514.0, 27116547.0]',
63: '[3814657.0, 3816821.0, 3818830.0, 9372780.0, 22791620.0, 22805152.0, 23283422.0, 25248920.0, 25586333.0, 27020756.0, 27125092.0, 27145399.0, 27435241.0, 27449240.0, 27582841.0]',
64: '[3775561.0, 3778209.0, 3780242.0, 3783251.0, 3784665.0, 3788774.0, 3798212.0, 3811858.0, 3812283.0, 21830878.0, 21921748.0, 21993829.0, 22457245.0, 22460889.0, 23262728.0, 23400964.0, 23566456.0, 24092138.0, 24403780.0, 25289929.0, 25369658.0, 25618677.0, 25619320.0, 25629177.0, 25634619.0, 25645458.0, 26901477.0, 27038338.0, 27156461.0, 27158001.0, 27372667.0, 27391046.0, 27503418.0, 27537075.0]',
65: '[3769083.0, 3838826.0, 6518919.0, 7655380.0, 7671393.0, 9161974.0, 11933062.0, 12421582.0, 14111284.0, 15041555.0, 17038380.0, 17934524.0, 17951479.0, 17951704.0, 18736765.0, 21855631.0, 22254687.0, 22522730.0, 22525819.0, 22654614.0, 23072375.0, 23161341.0, 23682934.0, 23928270.0, 24002481.0, 25012845.0, 25464571.0, 25530090.0, 25936857.0, 26407346.0, 26861077.0, 41210539.0]',
66: '[3771617.0, 3807056.0, 8167498.0, 9489516.0, 13059819.0, 15236705.0, 17288890.0, 18106562.0, 18243976.0, 19449212.0, 19549705.0, 20360746.0, 21950670.0, 22523056.0, 22590937.0, 22822082.0, 22985088.0, 23085669.0, 23264894.0, 23454885.0, 23791789.0, 24158232.0, 24239892.0, 24257894.0, 24280874.0, 24434788.0, 24953310.0, 24990933.0, 25037706.0, 26312302.0, 26461656.0, 26569604.0, 26755930.0, 26802300.0, 26860472.0, 26891244.0, 26998345.0, 27036330.0, 27157297.0, 27377463.0]',
67: '[8223754.0, 21700957.0, 22248239.0, 24188773.0, 25199790.0, 25489601.0, 27370550.0]',
68: '[3824061.0, 10778962.0, 27157905.0]',
69: '[3885448.0, 4265687.0, 6453737.0, 15055174.0, 21588115.0, 22803210.0, 22810531.0, 22830406.0, 23778134.0, 23779509.0, 26598222.0, 27395145.0, 27536489.0]',
70: '[3817251.0, 3824297.0, 11604215.0, 13348182.0, 15295862.0, 17007082.0, 19729972.0, 19731450.0, 22867664.0, 23356034.0, 24169834.0, 25375270.0, 26970267.0, 27553681.0, 31500731.0, 31500732.0, 35705261.0]',
71: '[5931149.0, 19811894.0, 19812444.0, 22378265.0, 22409405.0, 23400964.0, 24164668.0, 25377816.0, 25484442.0, 26737825.0, 27395052.0, 27403058.0, 27517636.0]',
72: '[3772180.0, 4094759.0, 4099701.0, 4109923.0, 21758734.0, 22489510.0, 22802791.0, 23109074.0, 23332890.0, 23945495.0, 25404671.0, 26988331.0]',
73: '[22556333.0, 23537378.0, 23653584.0, 26050881.0, 26840895.0, 26877180.0, 27462050.0, 27463470.0]',
74: '[3775845.0, 24206625.0]',
75: '[4064369.0, 4172630.0, 8512849.0, 8513675.0, 10827902.0, 22681078.0, 24186095.0, 24990003.0, 26677157.0]',
76: '[4215108.0, 5754390.0, 6381956.0, 9309964.0, 13707851.0, 22117877.0]',
77: '[10969359.0, 11059344.0, 17714515.0, 19284446.0, 22690303.0, 26320567.0, 26415947.0]',
78: '[3888446.0, 3888996.0, 14727195.0, 22113364.0, 22782837.0, 25044309.0, 25167905.0, 26670443.0]',
79: '[3887054.0, 3889614.0, 3890522.0, 9303701.0, 9484895.0, 11363415.0, 14241244.0, 15291648.0, 16966026.0, 23250732.0, 24016081.0, 24393431.0, 24563127.0, 24788233.0, 25941613.0, 26366102.0, 27392409.0]',
80: '[27415886.0]'}}
within the list cited_docdb_list, however, there are ids that do not appear id docdb_family_id. What I would like to do is to detect the number of ids within cited_docdb_list which also appear in docdb_family_id. Is there a way to do so? My df is very large actually (almost 700000 observations). Please notice that the type of docdb_family_id and cited_docdb_list differs in the data.
The expected outcome, for instance for the first couple of docdb_family_ids should be:
docdb_family_id nb_included
3498148, 2
3512921, 1
...
where 3498148, 2 comes from the fact that the cited_docdb_list related to 3498148 cites 2 indices that appear in docdb_family_id, namely 3802281 and 3944218. In the same fashion, 3512921 cites 3800683 within cited_docdb_list.
Thank you
First idea is test intersection of sets with converted lists of strings to list of integers and get length of sets for nb_included:
import ast
df['cited_docdb_list'] = df['cited_docdb_list'].apply(ast.literal_eval)
sets = set(df['docdb_family_id'])
df['nb_included']=[len(set(map(int,x)).intersection(sets)) for x in df['cited_docdb_list']]
print (df)
docdb_family_id cited_docdb_list \
0 3498148 [3454392.0, 3489764.0, 3492286.0, 3802281.0, 3...
1 3512921 [22785397.0, 3800683.0]
2 3525647 [3508710.0, 3832248.0, 6015961.0, 9173676.0, 2...
3 3636418 [3482303.0, 3518675.0, 3688207.0, 3688953.0, 7...
4 3673165 [7917626.0, 13587294.0, 15860525.0, 16099836.0...
.. ... ...
76 3886195 [4215108.0, 5754390.0, 6381956.0, 9309964.0, 1...
77 3887480 [10969359.0, 11059344.0, 17714515.0, 19284446....
78 3890389 [3888446.0, 3888996.0, 14727195.0, 22113364.0,...
79 3892024 [3887054.0, 3889614.0, 3890522.0, 9303701.0, 9...
80 3944218 [27415886.0]
nb_included
0 2
1 1
2 1
3 1
4 0
.. ...
76 0
77 0
78 0
79 0
80 0
[81 rows x 3 columns]
Pandas solution with DataFrame.explode and Series.isin for test membership, last for count Trues aggregate sum:
df = (df.assign(cited_docdb_list = df['cited_docdb_list'].apply(ast.literal_eval))
.explode('cited_docdb_list')
.astype({'cited_docdb_list':int})
.assign(nb_included=lambda x: x['cited_docdb_list'].isin(x['docdb_family_id']))
.groupby('docdb_family_id', as_index=False)['nb_included']
.sum())
print (df)
docdb_family_id nb_included
0 3498148 2
1 3512921 1
2 3525647 1
3 3636418 1
4 3673165 0
.. ... ...
76 3886195 0
77 3887480 0
78 3890389 0
79 3892024 0
80 3944218 0
[81 rows x 2 columns]
Related
Using shift function along with max function Pandas
I am attempting to create a technical indicator ('Supertrend') using Pandas. The formula for this column is recursive. (For people familiar with Pinescript, this column will replicate the result of this Pinescript function): df['st_trendup'] = np.select(df['Close'].shift() > df['st_trendup'].shift(),df[['st_up','st_trendup'.shift()]].max(axis=1),df['st_up']) The problem occurs in the true part of the np.select()because I cannot call .shift() on a string. Normally, I would make a new column that uses .shift() beforehand but since this is recursive, I have to do it all in one line. If possible I'd like to avoid using loops for speed; prefer solutions using native pandas or numpy functions. What I am looking for A way to find max function that can accomodate a .shift() call Columns that are used: def tr(high,low,close1): return max(high - low, abs(high - close1), abs(low - close1)) df['st_closeprev'] = df['Close'].shift() df['st_hl2'] = (df['High']+df['Low'])/2 df['st_tr'] = df.apply(lambda row: tr(row['High'],row['Low'],row['st_closeprev']),axis=1) df['st_atr'] = df['st_tr'].ewm(alpha = 1/pd,adjust=False,min_periods=pd).mean() df['st_up'] = df['st_hl2'] - factor * df['st_atr'] df['st_dn'] = df['st_hl2'] + factor * df['st_atr'] df['st_trendup'] = np.select(df['Close'].shift() > df['st_trendup'].shift(),df[['st_up','st_trendup'.shift()]].max(axis=1),df['st_up']) Sample data obtained by the df.to_dict {'Date': {0: Timestamp('2021-01-01 09:15:00'), 1: Timestamp('2021-01-01 09:30:00'), 2: Timestamp('2021-01-01 09:45:00'), 3: Timestamp('2021-01-01 10:00:00'), 4: Timestamp('2021-01-01 10:15:00'), 5: Timestamp('2021-01-01 10:30:00'), 6: Timestamp('2021-01-01 10:45:00'), 7: Timestamp('2021-01-01 11:00:00'), 8: Timestamp('2021-01-01 11:15:00'), 9: Timestamp('2021-01-01 11:30:00'), 10: Timestamp('2021-01-01 11:45:00'), 11: Timestamp('2021-01-01 12:00:00'), 12: Timestamp('2021-01-01 12:15:00'), 13: Timestamp('2021-01-01 12:30:00'), 14: Timestamp('2021-01-01 12:45:00'), 15: Timestamp('2021-01-01 13:00:00'), 16: Timestamp('2021-01-01 13:15:00'), 17: Timestamp('2021-01-01 13:30:00'), 18: Timestamp('2021-01-01 13:45:00'), 19: Timestamp('2021-01-01 14:00:00'), 20: Timestamp('2021-01-01 14:15:00'), 21: Timestamp('2021-01-01 14:30:00'), 22: Timestamp('2021-01-01 14:45:00'), 23: Timestamp('2021-01-01 15:00:00'), 24: Timestamp('2021-01-01 15:15:00'), 25: Timestamp('2021-01-04 09:15:00')}, 'Open': {0: 31250.0, 1: 31376.0, 2: 31405.0, 3: 31389.4, 4: 31377.5, 5: 31347.8, 6: 31310.8, 7: 31343.4, 8: 31349.5, 9: 31349.9, 10: 31325.1, 11: 31310.9, 12: 31329.0, 13: 31376.0, 14: 31375.5, 15: 31357.4, 16: 31325.0, 17: 31341.1, 18: 31300.0, 19: 31324.5, 20: 31353.3, 21: 31350.0, 22: 31346.9, 23: 31330.0, 24: 31314.3, 25: 31450.2}, 'High': {0: 31407.0, 1: 31425.0, 2: 31411.95, 3: 31389.45, 4: 31382.0, 5: 31350.0, 6: 31354.6, 7: 31359.0, 8: 31370.0, 9: 31364.7, 10: 31350.0, 11: 31337.9, 12: 31378.9, 13: 31419.5, 14: 31377.75, 15: 31360.0, 16: 31367.15, 17: 31345.2, 18: 31340.0, 19: 31367.0, 20: 31375.0, 21: 31370.0, 22: 31350.0, 23: 31334.6, 24: 31329.6, 25: 31599.0}, 'Low': {0: 31250.0, 1: 31367.95, 2: 31352.5, 3: 31331.65, 4: 31301.4, 5: 31303.05, 6: 31310.0, 7: 31325.05, 8: 31335.35, 9: 31315.35, 10: 31281.9, 11: 31292.0, 12: 31316.25, 13: 31352.05, 14: 31335.0, 15: 31322.0, 16: 31318.25, 17: 31261.55, 18: 31283.3, 19: 31324.5, 20: 31322.0, 21: 31332.15, 22: 31324.1, 23: 31300.15, 24: 31280.0, 25: 31430.0}, 'Close': {0: 31375.0, 1: 31398.3, 2: 31386.0, 3: 31377.0, 4: 31342.3, 5: 31311.7, 6: 31345.0, 7: 31349.0, 8: 31344.2, 9: 31327.6, 10: 31311.3, 11: 31325.6, 12: 31373.0, 13: 31375.0, 14: 31357.4, 15: 31326.0, 16: 31345.9, 17: 31300.6, 18: 31324.4, 19: 31353.8, 20: 31345.6, 21: 31341.6, 22: 31332.5, 23: 31311.0, 24: 31285.0, 25: 31558.4}, 'Volume': {0: 259952, 1: 163775, 2: 105900, 3: 99725, 4: 115175, 5: 78625, 6: 67675, 7: 46575, 8: 53350, 9: 54175, 10: 96975, 11: 80925, 12: 79475, 13: 147775, 14: 38900, 15: 64925, 16: 52425, 17: 142175, 18: 81800, 19: 74950, 20: 68550, 21: 40350, 22: 47150, 23: 119200, 24: 222875, 25: 524625}}
Change: df[['st_up','st_trendup'.shift()]].max(axis=1) to: df[['st_up','st_trendup']].assign(st_trendup = df['st_trendup'].shift()).max(axis=1)
joining/merging both index and non-index columns in a pandas multi-index
Context: I have two very large pandas dataframes to join which barely fit in memory (8GB each, millions of rows) and have the challenge of performing a performant join using combinations of both indexed and non-indexed columns. Fuzzy joining is out of scope. Variables in order of cardinality: dataset_1 has these variables: postcode, street_name, secondary_number, primary_number, unique_id dataset_2 has these variables: postcode, street_name, house_number, house_name, sub_building_name, different_unique_id postcode and street_name are shared keys, and multiindexing seems the correct choice to improve joining performance in pandas: dataset_1 = dataset_1.set_index(['postcode', 'street', "unique_id"]).sort_index() dataset_2 = dataset_2.set_index(['postcode', 'street', "different_unique_id"]).sort_index() Processing: At this stage I can compute in pandas if memory allows. If not, I would use Dask, however it can't handle multi-indexes. In the event this were possible (or unnecessary) the sorting would still need to be handled in pandas as Dask cannot manage this. If Dask were an option this is how I would convert: dd1 = dd.from_pandas(dataset_1, npartitions=1) #large left dataframe del dataset_1 #to release the memory dd2 = dd.from_pandas(dataset_2, npartitions=3) #partitioned right dataframe for performance del dataset_2 #to release the memory Problem: The challenge is performing an inner join on non-null variables using the indexes ("postcode" and "street"), alongside non-indexed columns. Combinations of the non-indexed variables will be iterated in a for loop. Solution Sketch: This gives an idea what I would like to do to maintain the performance gains from the indexing, but is of course not syntactically possible: output = pd.merge(df1, df2, how='inner', left_on=["postcode", "street_name", "secondary_number", "primary_number"], right_on=["postcode", "street_name", "house_name", "house_number"], left_index=[True,True,False,False], right_index=[True,True,False,False]) Summary: My understanding is that pd.join can handle non-indexed and indexed columns, whereas pd.merge cannot. As a result I'm unsure how to achieve this join in pd.join where there is a combination of both indexed and non-indexed columns. Example of intersects: {'different_unique_id': {27: '{582D0636-8DEF-8F22-E053-6C04A8C01BAC}', 41: '{D9E869FE-7B55-4C36-AC43-695B9033A13B}', 33: '{93E6821E-554E-40FD-E053-6B04A8C0C1DF}', 1: '{288DCE29-0589-E510-E050-A8C06205480E}', 48: '{3A23DDD5-A0E8-41D2-A514-5B09385C301F}', 52: '{CEB16957-F7FA-4D1B-B45F-A390214735BC}', 13: '{404A5AF3-9B20-CD2B-E050-A8C063055C7B}', 16: '{64342BFD-FD07-422C-E053-6C04A8C0FB8A}', 57: '{29A8E769-8A10-4477-9494-FF55EF5FAE4B}', 10: '{404A5AF3-0B58-CD2B-E050-A8C063055C7B}', 21: '{55BDCAE6-0C10-521D-E053-6B04A8C0DD7A}', 31: '{5C676A02-1781-4152-950C-6E5CA2CBC487}', 7: '{68FEB20B-142E-38DA-E053-6C04A8C051AE}', 45: '{8F1B26BD-673F-53DB-E053-6C04A8C03649}', 12: '{2F115F7A-8F81-4124-9FD4-FB76E742B2C1}', 36: '{344AB2D7-4B59-4AB4-8F52-75B29BE8C509}', 20: '{965B6D91-D4B6-95E4-E053-6C04A8C07729}', 56: '{59872FD9-F39D-4BB9-95F6-91E002D948B1}', 22: '{6141DFF0-973F-4FEC-A582-7F310B566031}'}, 'unique_id': {27: 10002277489, 41: 64023255, 33: 10007367447, 1: 22229221, 48: 10033235735, 52: 100062162615, 13: 50103744, 16: 10022903998, 57: 12015624, 10: 12154940, 21: 10024247587, 31: 100041193990, 7: 10008230730, 45: 10091640210, 12: 202107394, 36: 5062293, 20: 48114659, 56: 10001311242, 22: 10000443154}, 'street': {27: 'thewharf', 41: 'parkroad', 33: 'oldmillclose', 1: 'thirdavenue', 48: 'woolnersway', 52: 'sumnerroad', 13: 'cliftongardens', 16: 'windhamroad', 57: 'westparkroad', 10: 'grangeroad', 21: 'staplersroad', 31: 'strand', 7: 'amhurstroad', 45: 'eatonroad', 12: 'northendroad', 36: 'belsizegrove', 20: 'watermillway', 56: 'orchardplace', 22: 'thurlowparkroad'}, 'postcode': {27: 'lu72la', 41: 'cf626nt', 33: 'hr40aq', 1: 'bn32pd', 48: 'sg13ae', 52: 'gu97jx', 13: 'ct202ef', 16: 'bh14rn', 57: 'ub24af', 10: 'w55bu', 21: 'po302dp', 31: 'tq148aq', 7: 'e82ag', 45: 'ch47ew', 12: 'ha90ae', 36: 'nw34tt', 20: 'sw192rw', 56: 'so143hw', 22: 'se218hp'}, 'secondary_number': {27: '76', 41: 'flat6', 33: '49', 1: 'flat10', 48: '145', 52: '31', 13: 'flat19', 16: 'flat7', 57: '76', 10: 'flat1', 21: 'flat1', 31: 'flat43', 7: 'flata', 45: '8', 12: '42', 36: 'flat9', 20: 'flat43', 56: 'flat156', 22: 'flat2'}, 'primary_number': {27: 'eastdock', 41: 'courtlands', 33: 'watkinscourt', 1: 'ascothouse', 48: 'monumentcourt', 52: 'sumnercourt', 13: '22-24', 16: '77', 57: 'osterleyviews', 10: '55-59', 21: '138', 31: 'leandercourt', 7: '130', 45: 'greenbankhall', 12: 'danescourt', 36: 'holmefieldcourt', 20: 'bennetscourtyard', 56: 'oceanaboulevard', 22: '124f'}, 'building_name': {27: 'eastdock', 41: 'courtlands', 33: 'watkinscourt', 1: 'ascothouse', 48: 'monumentcourt', 52: 'sumnercourt', 13: None, 16: None, 57: 'osterleyviews', 10: None, 21: None, 31: 'leandercourt', 7: None, 45: 'greenbankhall', 12: 'danescourt', 36: 'holmefieldcourt', 20: 'bennetscourtyard', 56: 'oceanaboulevard', 22: None}, 'building_number': {27: None, 41: None, 33: None, 1: '18-20', 48: None, 52: None, 13: '22-24', 16: '77', 57: None, 10: '55-59', 21: '138', 31: None, 7: '130', 45: None, 12: None, 36: None, 20: None, 56: None, 22: '124f'}, 'sub_building': {27: '76', 41: 'flat6', 33: '49', 1: 'flat10', 48: '145', 52: '31', 13: 'flat19', 16: 'flat7', 57: '76', 10: 'flat1', 21: 'flat1', 31: 'flat43', 7: 'flata', 45: '8', 12: '42', 36: 'flat9', 20: 'flat43', 56: 'flat156', 22: 'flat2'}}
I am using Apache opennlp 1.8.0, I am trying to use POSTaggerTrainer for training
After referring to the documentation of version 1.8.0 I tried the CLI command given in the doc, it doesn't seem to be working, nor is the Java code given under API. I have a text file with the following text: train-me.txt Last_JJ September_NNP ,_, I_PRP tried_VBD to_TO find_VB out_RP the_DT address_NN of_IN an_DT old_JJ school_NN friend_NN whom_WP I_PRP had_VBD not_RB seen_VBN for_IN 15_CD years_NNS ._. I_PRP just_RB knew_VBD his_PRP$ name_NN ,_, Alan_NNP McKennedy_NNP ,_, and_CC I_PRP 'd_MD heard_VBD the_DT rumour_NN that_IN he_PRP 'd_MD moved_VBD to_TO Scotland_NNP ,_, the_DT country_NN of_IN his_PRP$ ancestors_NNS ._. dictionary.xml <?xml version="1.0" encoding="UTF-8"?><dictionary> <entry tags="NNP"> <token>Calysta</token> </entry> </dictionary> I want to use either of these two (if possible) to train the program to tag Calysta as Calysta_NNP
I know its kind of a late answer, but if it helps.. arjun#arjun-VPCEH26EN:~/apache-opennlp-1.8.0/bin$ ./opennlp POSTaggerTrainer -data train-me.txt -dict dictionary.xml -lang en -model en-pos-maxent-cust.bin Indexing events using cutoff of 5 Computing event counts... done. 52 events Indexing... done. Sorting and merging events... done. Reduced 52 events to 37. Done indexing. Incorporating indexed data for training... done. Number of Event Tokens: 37 Number of Outcomes: 20 Number of Predicates: 13 ...done. Computing model parameters ... Performing 100 iterations. 1: ... loglikelihood=-155.77807822480764 0.038461538461538464 2: ... loglikelihood=-130.9791219262959 0.5 3: ... loglikelihood=-115.82234962334346 0.5576923076923077 4: ... loglikelihood=-105.13170003394434 0.6730769230769231 5: ... loglikelihood=-96.9869322585347 0.6730769230769231 6: ... loglikelihood=-90.51694300405765 0.6923076923076923 7: ... loglikelihood=-85.23546058034727 0.6923076923076923 8: ... loglikelihood=-80.83562367302892 0.7307692307692307 9: ... loglikelihood=-77.1097811259408 0.7307692307692307 10: ... loglikelihood=-73.91120812658458 0.7307692307692307 11: ... loglikelihood=-71.13309894938885 0.75 12: ... loglikelihood=-68.69589846103266 0.75 13: ... loglikelihood=-66.53917914878002 0.75 14: ... loglikelihood=-64.61622830997396 0.75 15: ... loglikelihood=-62.890348665987055 0.75 16: ... loglikelihood=-61.332281582677155 0.75 17: ... loglikelihood=-59.91838269276684 0.75 18: ... loglikelihood=-58.629310291693805 0.75 19: ... loglikelihood=-57.44906823464401 0.75 20: ... loglikelihood=-56.36429724151985 0.75 21: ... loglikelihood=-55.36374258766163 0.75 22: ... loglikelihood=-54.43784870333842 0.75 23: ... loglikelihood=-53.57844629573773 0.75 24: ... loglikelihood=-52.77850781690259 0.75 25: ... loglikelihood=-52.03195408008879 0.75 26: ... loglikelihood=-51.333499646171695 0.75 27: ... loglikelihood=-50.67852796323892 0.75 28: ... loglikelihood=-50.062989611378285 0.75 29: ... loglikelihood=-49.48331869161687 0.75 30: ... loglikelihood=-48.93636361232364 0.75 31: ... loglikelihood=-48.419329410290345 0.75 32: ... loglikelihood=-47.92972939439551 0.75 33: ... loglikelihood=-47.465344384258486 0.75 34: ... loglikelihood=-47.02418818116749 0.75 35: ... loglikelihood=-46.604478186421446 0.75 36: ... loglikelihood=-46.20461029609541 0.75 37: ... loglikelihood=-45.82313736754338 0.75 38: ... loglikelihood=-45.458750683509976 0.75 39: ... loglikelihood=-45.11026394313063 0.75 40: ... loglikelihood=-44.77659939167084 0.75 41: ... loglikelihood=-44.45677576728319 0.75 42: ... loglikelihood=-44.14989779685863 0.75 43: ... loglikelihood=-43.855147016888836 0.75 44: ... loglikelihood=-43.571773731178716 0.75 45: ... loglikelihood=-43.299089946831224 0.75 46: ... loglikelihood=-43.03646315440174 0.75 47: ... loglikelihood=-42.78331083845189 0.75 48: ... loglikelihood=-42.53909562169248 0.75 49: ... loglikelihood=-42.30332096009808 0.7692307692307693 50: ... loglikelihood=-42.07552731829657 0.7692307692307693 51: ... loglikelihood=-41.85528876457919 0.7692307692307693 52: ... loglikelihood=-41.642209933359936 0.7692307692307693 53: ... loglikelihood=-41.43592331010347 0.7692307692307693 54: ... loglikelihood=-41.236086799846426 0.7692307692307693 55: ... loglikelihood=-41.04238154563922 0.7692307692307693 56: ... loglikelihood=-40.854509967677004 0.7692307692307693 57: ... loglikelihood=-40.67219399768791 0.7692307692307693 58: ... loglikelihood=-40.49517348640929 0.7692307692307693 59: ... loglikelihood=-40.32320476478338 0.7692307692307693 60: ... loglikelihood=-40.1560593419208 0.7692307692307693 61: ... loglikelihood=-39.99352272496435 0.7692307692307693 62: ... loglikelihood=-39.835393347789605 0.7692307692307693 63: ... loglikelihood=-39.68148159704321 0.7692307692307693 64: ... loglikelihood=-39.53160892537774 0.7692307692307693 65: ... loglikelihood=-39.38560704292392 0.7692307692307693 66: ... loglikelihood=-39.243317179072264 0.7692307692307693 67: ... loglikelihood=-39.10458940753585 0.7692307692307693 68: ... loglikelihood=-38.969282028454 0.7692307692307693 69: ... loglikelihood=-38.8372610019872 0.7692307692307693 70: ... loglikelihood=-38.70839942845979 0.7692307692307693 71: ... loglikelihood=-38.58257707064014 0.7692307692307693 72: ... loglikelihood=-38.45967991421811 0.7692307692307693 73: ... loglikelihood=-38.33959976295419 0.7692307692307693 74: ... loglikelihood=-38.222233865340385 0.7692307692307693 75: ... loglikelihood=-38.107484569938585 0.7692307692307693 76: ... loglikelihood=-37.995259006848066 0.7692307692307693 77: ... loglikelihood=-37.88546879301048 0.7692307692307693 78: ... loglikelihood=-37.77802975928638 0.7692307692307693 79: ... loglikelihood=-37.6728616974405 0.7692307692307693 80: ... loglikelihood=-37.56988812535212 0.7692307692307693 81: ... loglikelihood=-37.469036068928645 0.7692307692307693 82: ... loglikelihood=-37.370235859343474 0.7692307692307693 83: ... loglikelihood=-37.27342094434868 0.7692307692307693 84: ... loglikelihood=-37.178527712527796 0.7692307692307693 85: ... loglikelihood=-37.08549532945806 0.7692307692307693 86: ... loglikelihood=-36.99426558484419 0.7692307692307693 87: ... loglikelihood=-36.904782749769446 0.7692307692307693 88: ... loglikelihood=-36.81699344328549 0.7692307692307693 89: ... loglikelihood=-36.730846507630154 0.7692307692307693 90: ... loglikelihood=-36.64629289142378 0.7692307692307693 91: ... loglikelihood=-36.563285540250355 0.7692307692307693 92: ... loglikelihood=-36.48177929407976 0.7692307692307693 93: ... loglikelihood=-36.40173079103272 0.7692307692307693 94: ... loglikelihood=-36.32309837703207 0.7692307692307693 95: ... loglikelihood=-36.24584202091997 0.7692307692307693 96: ... loglikelihood=-36.16992323465651 0.7692307692307693 97: ... loglikelihood=-36.095304998244124 0.7692307692307693 98: ... loglikelihood=-36.021951689052344 0.7692307692307693 99: ... loglikelihood=-35.94982901524132 0.7692307692307693 100: ... loglikelihood=-35.87890395300729 0.7692307692307693 Writing pos tagger model ... done (0.086s) Wrote pos tagger model to path: /home/arjun/apache-opennlp-1.8.0/bin/en-pos-maxent-cust.bin Execution time: 0.522 seconds I used Apache OpenNLP 1.8.0. Revert if you need any help with Apache OpenNLP POS Tagger.
Creating a Dropdown menu in Plotly from Pandas
I've had a look at the following link but its not very clear https://plot.ly/pandas/dropdowns/. I have the following figure generated in plotly but would like a dropdown menu (of A, B and C) to select and display the respective line only import pandas as pd import plotly plotly.offline.init_notebook_mode() import plotly.offline as py from plotly.graph_objs import * df = pd.DataFrame({'freq': {0: 0.01, 1: 0.02, 2: 0.029999999999999999, 3: 0.040000000000000001, 4: 0.050000000000000003, 5: 0.059999999999999998, 6: 0.070000000000000007, 7: 0.080000000000000002, 8: 0.089999999999999997, 9: 0.10000000000000001, 10: 0.01, 11: 0.02, 12: 0.029999999999999999, 13: 0.040000000000000001, 14: 0.050000000000000003, 15: 0.059999999999999998, 16: 0.070000000000000007, 17: 0.080000000000000002, 18: 0.089999999999999997, 19: 0.10000000000000001, 20: 0.01, 21: 0.02, 22: 0.029999999999999999, 23: 0.040000000000000001, 24: 0.050000000000000003, 25: 0.059999999999999998, 26: 0.070000000000000007, 27: 0.080000000000000002, 28: 0.089999999999999997, 29: 0.10000000000000001}, 'kit': {0: 'B', 1: 'B', 2: 'B', 3: 'B', 4: 'B', 5: 'B', 6: 'B', 7: 'B', 8: 'B', 9: 'B', 10: 'A', 11: 'A', 12: 'A', 13: 'A', 14: 'A', 15: 'A', 16: 'A', 17: 'A', 18: 'A', 19: 'A', 20: 'C', 21: 'C', 22: 'C', 23: 'C', 24: 'C', 25: 'C', 26: 'C', 27: 'C', 28: 'C', 29: 'C'}, 'SNS': {0: 91.198979591799997, 1: 90.263605442199989, 2: 88.818027210899999, 3: 85.671768707499993, 4: 76.23299319729999, 5: 61.0969387755, 6: 45.1530612245, 7: 36.267006802700003, 8: 33.0782312925, 9: 30.739795918400002, 10: 90.646258503400006, 11: 90.306122449, 12: 90.178571428600009, 13: 89.498299319699996, 14: 88.435374149599994, 15: 83.588435374200003, 16: 75.212585034, 17: 60.969387755100001, 18: 47.278911564600001, 19: 37.627551020399999, 20: 90.986394557800011, 21: 90.136054421799997, 22: 89.540816326499993, 23: 88.690476190499993, 24: 86.479591836799997, 25: 82.397959183699996, 26: 73.809523809499993, 27: 63.180272108800004, 28: 50.935374149700003, 29: 41.241496598699996}, 'FPR': {0: 1.0953616823100001, 1: 0.24489252678500001, 2: 0.15106142277199999, 3: 0.104478605177, 4: 0.089172822253300005, 5: 0.079856258734300009, 6: 0.065881413455800009, 7: 0.059892194050699996, 8: 0.059892194050699996, 9: 0.0578957875824, 10: 0.94097291541899997, 11: 0.208291741532, 12: 0.14773407865800001, 13: 0.107805949291, 14: 0.093165635189999998, 15: 0.082518134025399995, 16: 0.074532508152000007, 17: 0.065881413455800009, 18: 0.062554069341799995, 19: 0.061888600519100001, 20: 0.85313103081100006, 21: 0.18899314567100001, 22: 0.14107939043000001, 23: 0.110467824582, 24: 0.099820323417899995, 25: 0.085180009316599997, 26: 0.078525321088700001, 27: 0.073201570506399985, 28: 0.071870632860800004, 29: 0.0705396952153}}) fig = { 'data': [ { 'x': df[df['kit']==kit]['FPR'], 'y': df[df['kit']==kit]['SNS'], 'name': kit, } for kit in ['A', 'B', 'C'] ], } py.iplot(fig)
I'm not sure how to do this directly from plotly; however, you can use interact function from ipywidgets library. In your case it will be the following: from ipywidgets import interact df = pd.DataFrame({'freq': {0: 0.01, 1: 0.02, 2: 0.029999999999999999, 3: 0.040000000000000001, 4: 0.050000000000000003, 5: 0.059999999999999998, 6: 0.070000000000000007, 7: 0.080000000000000002, 8: 0.089999999999999997, 9: 0.10000000000000001, 10: 0.01, 11: 0.02, 12: 0.029999999999999999, 13: 0.040000000000000001, 14: 0.050000000000000003, 15: 0.059999999999999998, 16: 0.070000000000000007, 17: 0.080000000000000002, 18: 0.089999999999999997, 19: 0.10000000000000001, 20: 0.01, 21: 0.02, 22: 0.029999999999999999, 23: 0.040000000000000001, 24: 0.050000000000000003, 25: 0.059999999999999998, 26: 0.070000000000000007, 27: 0.080000000000000002, 28: 0.089999999999999997, 29: 0.10000000000000001}, 'kit': {0: 'B', 1: 'B', 2: 'B', 3: 'B', 4: 'B', 5: 'B', 6: 'B', 7: 'B', 8: 'B', 9: 'B', 10: 'A', 11: 'A', 12: 'A', 13: 'A', 14: 'A', 15: 'A', 16: 'A', 17: 'A', 18: 'A', 19: 'A', 20: 'C', 21: 'C', 22: 'C', 23: 'C', 24: 'C', 25: 'C', 26: 'C', 27: 'C', 28: 'C', 29: 'C'}, 'SNS': {0: 91.198979591799997, 1: 90.263605442199989, 2: 88.818027210899999, 3: 85.671768707499993, 4: 76.23299319729999, 5: 61.0969387755, 6: 45.1530612245, 7: 36.267006802700003, 8: 33.0782312925, 9: 30.739795918400002, 10: 90.646258503400006, 11: 90.306122449, 12: 90.178571428600009, 13: 89.498299319699996, 14: 88.435374149599994, 15: 83.588435374200003, 16: 75.212585034, 17: 60.969387755100001, 18: 47.278911564600001, 19: 37.627551020399999, 20: 90.986394557800011, 21: 90.136054421799997, 22: 89.540816326499993, 23: 88.690476190499993, 24: 86.479591836799997, 25: 82.397959183699996, 26: 73.809523809499993, 27: 63.180272108800004, 28: 50.935374149700003, 29: 41.241496598699996}, 'FPR': {0: 1.0953616823100001, 1: 0.24489252678500001, 2: 0.15106142277199999, 3: 0.104478605177, 4: 0.089172822253300005, 5: 0.079856258734300009, 6: 0.065881413455800009, 7: 0.059892194050699996, 8: 0.059892194050699996, 9: 0.0578957875824, 10: 0.94097291541899997, 11: 0.208291741532, 12: 0.14773407865800001, 13: 0.107805949291, 14: 0.093165635189999998, 15: 0.082518134025399995, 16: 0.074532508152000007, 17: 0.065881413455800009, 18: 0.062554069341799995, 19: 0.061888600519100001, 20: 0.85313103081100006, 21: 0.18899314567100001, 22: 0.14107939043000001, 23: 0.110467824582, 24: 0.099820323417899995, 25: 0.085180009316599997, 26: 0.078525321088700001, 27: 0.073201570506399985, 28: 0.071870632860800004, 29: 0.0705396952153}}) def plot_it(kit): fig = { 'data': [ { 'x': df[df['kit']==kit]['FPR'], 'y': df[df['kit']==kit]['SNS'], 'name': kit } ] } py.iplot(fig) interact(plot_it, kit=('A', 'B', 'C'))
Pandas Multivariate Linear Regression by Group and Saving Results as csv
I am trying to calculate linear regression of Y=C-A column, x = ['Plate X', 'Plate Y', 'Field X'] and group those values by Drum and Plate. Additional question - how to save results as a file, csv preferable. Is pandas package is sufficient for this task or other package needed. Thank you There is my data set: DF = {'A': {0: 305.03277000000003, 1: 304.42513500000001, 2: 305.119575, 3: 304.42513500000001, 4: 304.07791500000002, 5: 304.85916000000003, 6: 305.72721000000001, 7: 305.81401499999998, 8: 304.07791500000002, 9: 305.03277000000003, 10: 304.68554999999998, 11: 304.945965, 12: 303.38347499999998, 13: 304.945965, 14: 304.51193999999998, 15: 304.25152500000002, 16: 304.51193999999998, 17: 304.25152500000002, 18: 304.42513500000001, 19: 304.85916000000003, 20: 303.8175, 21: 305.119575, 22: 304.59874500000001, 23: 304.68554999999998, 24: 304.33832999999998, 25: 303.90430499999997, 26: 304.68554999999998, 27: 304.772355, 28: 304.59874500000001, 29: 304.772355, 30: 304.59874500000001, 31: 305.119575, 32: 305.37998999999996, 33: 304.59874500000001, 34: 304.42513500000001, 35: 304.33832999999998, 36: 304.51193999999998, 37: 305.46679499999999, 38: 304.59874500000001, 39: 305.29318499999999, 40: 304.85916000000003, 41: 305.29318499999999, 42: 305.119575, 43: 304.945965, 44: 305.29318499999999, 45: 304.85916000000003, 46: 305.72721000000001, 47: 306.16123500000003, 48: 305.37998999999996, 49: 305.03277000000003, 50: 305.20637999999997, 51: 304.51193999999998, 52: 308.33136000000002, 53: 305.81401499999998, 54: 305.55360000000002, 55: 306.42165, 56: 305.64040499999999, 57: 305.29318499999999, 58: 305.37998999999996, 59: 304.772355, 60: 305.37998999999996, 61: 305.72721000000001, 62: 305.90082000000001, 63: 305.64040499999999, 64: 305.81401499999998, 65: 304.85916000000003, 66: 305.20637999999997, 67: 306.42165, 68: 305.64040499999999, 69: 305.55360000000002, 70: 304.59874500000001, 71: 305.55360000000002, 72: 306.07443000000001, 73: 306.42165, 74: 305.98762499999998, 75: 306.68206499999997, 76: 305.03277000000003, 77: 305.46679499999999, 78: 306.42165, 79: 304.85916000000003, 80: 304.51193999999998, 81: 303.8175, 82: 304.51193999999998, 83: 304.16472000000005, 84: 304.51193999999998, 85: 303.73069500000003, 86: 303.29667000000001, 87: 304.68554999999998, 88: 303.73069500000003, 89: 304.42513500000001, 90: 304.51193999999998, 91: 304.16472000000005, 92: 304.945965, 93: 304.772355, 94: 304.42513500000001, 95: 304.16472000000005, 96: 305.119575, 97: 304.16472000000005, 98: 304.25152500000002, 99: 305.20637999999997}, 'B': {0: 311.10912000000002, 1: 310.93551000000002, 2: 313.279245, 3: 313.19243999999998, 4: 309.11260499999997, 5: 309.0258, 6: 309.72023999999999, 7: 313.279245, 8: 311.89036499999997, 9: 311.19592499999999, 10: 308.76538500000004, 11: 309.72023999999999, 12: 312.15078, 13: 309.19941, 14: 308.50497000000001, 15: 308.33136000000002, 16: 309.89384999999999, 17: 310.848705, 18: 312.23758500000002, 19: 313.53966000000003, 20: 309.72023999999999, 21: 309.11260499999997, 22: 311.89036499999997, 23: 309.98065499999996, 24: 309.19941, 25: 310.41467999999998, 26: 311.62995000000001, 27: 311.02231499999999, 28: 310.32787500000001, 29: 310.06745999999998, 30: 311.89036499999997, 31: 311.89036499999997, 32: 309.98065499999996, 33: 312.06397500000003, 34: 306.85567500000002, 35: 309.98065499999996, 36: 311.80356, 37: 309.19941, 38: 312.41119500000002, 39: 310.848705, 40: 311.10912000000002, 41: 310.501485, 42: 313.80007499999999, 43: 308.24455499999999, 44: 312.49799999999999, 45: 313.10563500000001, 46: 313.19243999999998, 47: 309.63343500000002, 48: 311.10912000000002, 49: 310.501485, 50: 310.58828999999997, 51: 314.23410000000001, 52: 312.41119500000002, 53: 313.01882999999998, 54: 311.19592499999999, 55: 311.54314500000004, 56: 313.279245, 57: 311.54314500000004, 58: 311.45634000000001, 59: 313.19243999999998, 60: 312.15078, 61: 312.15078, 62: 313.452855, 63: 311.02231499999999, 64: 311.02231499999999, 65: 311.28272999999996, 66: 311.02231499999999, 67: 307.897335, 68: 313.19243999999998, 69: 311.97717, 70: 311.10912000000002, 71: 312.58480499999996, 72: 312.58480499999996, 73: 315.01534500000002, 74: 311.97717, 75: 313.452855, 76: 311.80356, 77: 308.67857999999995, 78: 311.71675499999998, 79: 311.36953499999998, 80: 310.501485, 81: 308.85219000000001, 82: 311.10912000000002, 83: 309.37302, 84: 307.98413999999997, 85: 311.10912000000002, 86: 311.28272999999996, 87: 310.93551000000002, 88: 310.24107000000004, 89: 307.11608999999999, 90: 307.55011500000001, 91: 308.76538500000004, 92: 310.848705, 93: 307.02928500000002, 94: 309.89384999999999, 95: 311.28272999999996, 96: 307.81052999999997, 97: 309.72023999999999, 98: 311.54314500000004, 99: 310.32787500000001}, 'C': {0: 305.72721000000001, 1: 306.00498599999997, 2: 306.49109399999998, 3: 306.59526, 4: 305.48415599999998, 5: 305.24110200000001, 6: 306.28276199999999, 7: 306.97720199999998, 8: 306.80359199999998, 9: 307.081368, 10: 306.10915199999999, 11: 304.47721799999999, 12: 305.24110200000001, 13: 304.68554999999998, 14: 306.35220600000002, 15: 305.17165799999998, 16: 306.45637200000004, 17: 305.86609800000002, 18: 306.734148, 19: 306.28276199999999, 20: 305.51887799999997, 21: 308.053584, 22: 306.52581600000002, 23: 305.935542, 24: 306.56053800000001, 25: 306.10915199999999, 26: 306.56053800000001, 27: 305.79665399999999, 28: 305.761932, 29: 304.75499400000001, 30: 306.07443000000001, 31: 306.35220600000002, 32: 305.86609800000002, 33: 307.01192400000002, 34: 306.28276199999999, 35: 305.55360000000002, 36: 306.35220600000002, 37: 306.80359199999998, 38: 305.90082000000001, 39: 306.03970800000002, 40: 307.18553399999996, 41: 304.82443799999999, 42: 305.83137599999998, 43: 306.97720199999998, 44: 306.38692799999995, 45: 306.49109399999998, 46: 306.38692799999995, 47: 306.52581600000002, 48: 305.06749200000002, 49: 306.07443000000001, 50: 306.56053800000001, 51: 305.48415599999998, 52: 305.69248799999997, 53: 307.63692000000003, 54: 307.28969999999998, 55: 305.62304399999999, 56: 306.38692799999995, 57: 305.86609800000002, 58: 306.56053800000001, 59: 305.55360000000002, 60: 306.07443000000001, 61: 306.52581600000002, 62: 306.56053800000001, 63: 305.34526800000003, 64: 305.24110200000001, 65: 304.58138399999996, 66: 307.04664600000001, 67: 306.00498599999997, 68: 305.79665399999999, 69: 306.49109399999998, 70: 305.51887799999997, 71: 305.72721000000001, 72: 306.31748399999998, 73: 306.03970800000002, 74: 307.15081200000003, 75: 307.60219799999999, 76: 304.92860400000001, 77: 304.68554999999998, 78: 305.58832200000001, 79: 305.449434, 80: 306.83831400000003, 81: 306.49109399999998, 82: 306.94247999999999, 83: 304.963326, 84: 307.25497799999999, 85: 305.97026399999999, 86: 306.07443000000001, 87: 305.761932, 88: 305.90082000000001, 89: 306.31748399999998, 90: 306.69942599999996, 91: 306.07443000000001, 92: 305.449434, 93: 304.789716, 94: 304.72027200000002, 95: 306.10915199999999, 96: 305.449434, 97: 305.31054599999999, 98: 305.31054599999999, 99: 306.45637200000004}, 'C-A': {0: 0.69443999999999995, 1: 1.5798510000000001, 2: 1.3715190000000002, 3: 2.1701250000000001, 4: 1.4062410000000001, 5: 0.381942, 6: 0.55555200000000005, 7: 1.163187, 8: 2.7256770000000001, 9: 2.0485980000000001, 10: 1.423602, 11: -0.46874700000000002, 12: 1.8576270000000001, 13: -0.26041500000000001, 14: 1.840266, 15: 0.92013299999999998, 16: 1.9444319999999999, 17: 1.614573, 18: 2.3090130000000002, 19: 1.423602, 20: 1.7013779999999998, 21: 2.9340090000000001, 22: 1.927071, 23: 1.249992, 24: 2.2222080000000002, 25: 2.204847, 26: 1.8749880000000001, 27: 1.0242990000000001, 28: 1.163187, 29: -0.017361000000000001, 30: 1.4756850000000001, 31: 1.232631, 32: 0.48610799999999998, 33: 2.413179, 34: 1.8576270000000001, 35: 1.2152700000000001, 36: 1.840266, 37: 1.336797, 38: 1.3020750000000001, 39: 0.74652299999999994, 40: 2.3263739999999999, 41: -0.46874700000000002, 42: 0.71180100000000002, 43: 2.031237, 44: 1.0937430000000001, 45: 1.631934, 46: 0.65971800000000003, 47: 0.36458099999999999, 48: -0.312498, 49: 1.04166, 50: 1.354158, 51: 0.97221599999999997, 52: -2.6388720000000001, 53: 1.822905, 54: 1.7361, 55: -0.79860600000000004, 56: 0.74652299999999994, 57: 0.57291300000000001, 58: 1.1805479999999999, 59: 0.78124499999999997, 60: 0.69443999999999995, 61: 0.79860600000000004, 62: 0.65971800000000003, 63: -0.29513699999999998, 64: -0.57291300000000001, 65: -0.27777600000000002, 66: 1.840266, 67: -0.41666400000000003, 68: 0.156249, 69: 0.93749400000000005, 70: 0.92013299999999998, 71: 0.17360999999999999, 72: 0.24305399999999999, 73: -0.381942, 74: 1.163187, 75: 0.92013299999999998, 76: -0.10416600000000001, 77: -0.78124499999999997, 78: -0.83332800000000007, 79: 0.59027399999999997, 80: 2.3263739999999999, 81: 2.673594, 82: 2.4305400000000001, 83: 0.79860600000000004, 84: 2.7430380000000003, 85: 2.2395689999999999, 86: 2.7777599999999998, 87: 1.0763819999999999, 88: 2.1701250000000001, 89: 1.8923490000000001, 90: 2.1874860000000003, 91: 1.9097099999999998, 92: 0.50346899999999994, 93: 0.017361000000000001, 94: 0.29513699999999998, 95: 1.9444319999999999, 96: 0.32985900000000001, 97: 1.145826, 98: 1.059021, 99: 1.249992}, 'Drum': {0: 'LAAA', 1: 'LAAA', 2: 'LAAA', 3: 'LAAA', 4: 'LAAA', 5: 'LAAA', 6: 'LAAA', 7: 'LAAA', 8: 'LAAA', 9: 'LAAA', 10: 'LAAA', 11: 'LAAA', 12: 'LAAA', 13: 'LAAA', 14: 'LAAA', 15: 'LAAA', 16: 'LAAA', 17: 'LAAA', 18: 'LAAA', 19: 'LAAA', 20: 'LAAA', 21: 'LAAA', 22: 'LAAA', 23: 'LAAA', 24: 'LAAA', 25: 'LAAA', 26: 'LAAA', 27: 'LAAA', 28: 'LAAA', 29: 'LAAA', 30: 'LAAA', 31: 'LAAA', 32: 'LAAA', 33: 'LAAA', 34: 'LAAA', 35: 'LAAA', 36: 'LAAA', 37: 'LAAA', 38: 'LAAA', 39: 'LAAA', 40: 'LAAA', 41: 'LAAA', 42: 'LAAA', 43: 'LAAA', 44: 'LAAA', 45: 'LAAA', 46: 'LAAA', 47: 'LAAA', 48: 'LAAA', 49: 'LAAA', 50: 'LAAA', 51: 'LAAA', 52: 'LAAA', 53: 'LAAA', 54: 'LAAA', 55: 'LAAA', 56: 'LAAA', 57: 'LAAA', 58: 'LAAA', 59: 'LAAA', 60: 'LAAA', 61: 'LAAA', 62: 'LAAA', 63: 'LAAA', 64: 'LAAA', 65: 'LAAA', 66: 'LAAA', 67: 'LAAA', 68: 'LAAA', 69: 'LAAA', 70: 'LAAA', 71: 'LAAA', 72: 'LAAA', 73: 'LAAA', 74: 'LAAA', 75: 'LAAA', 76: 'LAAA', 77: 'LAAA', 78: 'LAAA', 79: 'LAAA', 80: 'LAAA', 81: 'LAAA', 82: 'LAAA', 83: 'LAAA', 84: 'LAAA', 85: 'LAAA', 86: 'LAAA', 87: 'LAAA', 88: 'LAAA', 89: 'LAAA', 90: 'LAAA', 91: 'LAAA', 92: 'LAAA', 93: 'LAAA', 94: 'LAAA', 95: 'LAAA', 96: 'LAAA', 97: 'LAAA', 98: 'LAAA', 99: 'LAAA'}, 'FIELD X': {0: 4.7949800000000007, 1: -5.5198839999999993, 2: 4.7949800000000007, 3: 4.7949800000000007, 4: -5.5198839999999993, 5: 4.7949800000000007, 6: -5.5198839999999993, 7: 4.7949800000000007, 8: 4.7949800000000007, 9: -5.5198839999999993, 10: -5.5198839999999993, 11: 4.7949800000000007, 12: 4.7949800000000007, 13: -5.5198839999999993, 14: 4.7949800000000007, 15: -5.5198839999999993, 16: 4.7949800000000007, 17: -5.5198839999999993, 18: 4.7949800000000007, 19: 4.7949800000000007, 20: -5.5198839999999993, 21: 4.7949800000000007, 22: -5.5198839999999993, 23: 4.7949800000000007, 24: 4.7949800000000007, 25: -5.5198839999999993, 26: 4.7949800000000007, 27: -5.5198839999999993, 28: -5.5198839999999993, 29: 4.7949800000000007, 30: -5.5198839999999993, 31: 4.7949800000000007, 32: 4.7949800000000007, 33: -5.5198839999999993, 34: 4.7949800000000007, 35: -5.5198839999999993, 36: 4.7949800000000007, 37: -5.5198839999999993, 38: 4.7949800000000007, 39: -5.5198839999999993, 40: 4.7949800000000007, 41: -5.5198839999999993, 42: 4.7949800000000007, 43: -5.5198839999999993, 44: 4.7949800000000007, 45: -5.5198839999999993, 46: 4.7949800000000007, 47: -5.5198839999999993, 48: 4.7949800000000007, 49: -5.5198839999999993, 50: -5.5198839999999993, 51: 4.7949800000000007, 52: -5.5198839999999993, 53: 4.7949800000000007, 54: 4.7949800000000007, 55: -5.5198839999999993, 56: 4.7949800000000007, 57: -5.5198839999999993, 58: 4.7949800000000007, 59: -5.5198839999999993, 60: 4.7949800000000007, 61: 4.7949800000000007, 62: -5.5198839999999993, 63: 4.7949800000000007, 64: -5.5198839999999993, 65: 4.7949800000000007, 66: 4.7949800000000007, 67: -5.5198839999999993, 68: 4.7949800000000007, 69: -5.5198839999999993, 70: -5.5198839999999993, 71: 4.7949800000000007, 72: -5.5198839999999993, 73: 4.7949800000000007, 74: -5.5198839999999993, 75: 4.7949800000000007, 76: -5.5198839999999993, 77: -5.5198839999999993, 78: 4.7949800000000007, 79: -5.5198839999999993, 80: 4.7949800000000007, 81: -5.5198839999999993, 82: 4.7949800000000007, 83: 4.7949800000000007, 84: -5.5198839999999993, 85: 4.7949800000000007, 86: -5.5198839999999993, 87: 4.7949800000000007, 88: 4.7949800000000007, 89: -5.5198839999999993, 90: -5.5198839999999993, 91: 4.7949800000000007, 92: 4.7949800000000007, 93: -5.5198839999999993, 94: 4.7949800000000007, 95: -5.5198839999999993, 96: 4.7949800000000007, 97: -5.5198839999999993, 98: 4.7949800000000007, 99: 4.7949800000000007}, 'FIELD Y': {0: 1.8893500000000001, 1: 1.8893500000000001, 2: 1.8893500000000001, 3: 1.8893500000000001, 4: 1.8893500000000001, 5: 1.8893500000000001, 6: 1.8893500000000001, 7: 1.8893500000000001, 8: 1.8893500000000001, 9: 1.8893500000000001, 10: 1.8893500000000001, 11: 1.8893500000000001, 12: 1.8893500000000001, 13: 1.8893500000000001, 14: 1.8893500000000001, 15: 1.8893500000000001, 16: 1.8893500000000001, 17: 1.8893500000000001, 18: 1.8893500000000001, 19: 1.8893500000000001, 20: 1.8893500000000001, 21: 1.8893500000000001, 22: 1.8893500000000001, 23: 1.8893500000000001, 24: 1.8893500000000001, 25: 1.8893500000000001, 26: 1.8893500000000001, 27: 1.8893500000000001, 28: 1.8893500000000001, 29: 1.8893500000000001, 30: 1.8893500000000001, 31: 1.8893500000000001, 32: 1.8893500000000001, 33: 1.8893500000000001, 34: 1.8893500000000001, 35: 1.8893500000000001, 36: 1.8893500000000001, 37: 1.8893500000000001, 38: 1.8893500000000001, 39: 1.8893500000000001, 40: 1.8893500000000001, 41: 1.8893500000000001, 42: 1.8893500000000001, 43: 1.8893500000000001, 44: 1.8893500000000001, 45: 1.8893500000000001, 46: 1.8893500000000001, 47: 1.8893500000000001, 48: 1.8893500000000001, 49: 1.8893500000000001, 50: 1.8893500000000001, 51: 1.8893500000000001, 52: 1.8893500000000001, 53: 1.8893500000000001, 54: 1.8893500000000001, 55: 1.8893500000000001, 56: 1.8893500000000001, 57: 1.8893500000000001, 58: 1.8893500000000001, 59: 1.8893500000000001, 60: 1.8893500000000001, 61: 1.8893500000000001, 62: 1.8893500000000001, 63: 1.8893500000000001, 64: 1.8893500000000001, 65: 1.8893500000000001, 66: 1.8893500000000001, 67: 1.8893500000000001, 68: 1.8893500000000001, 69: 1.8893500000000001, 70: 1.8893500000000001, 71: 1.8893500000000001, 72: 1.8893500000000001, 73: 1.8893500000000001, 74: 1.8893500000000001, 75: 1.8893500000000001, 76: 1.8893500000000001, 77: 1.8893500000000001, 78: 1.8893500000000001, 79: 1.8893500000000001, 80: 1.8893500000000001, 81: 1.8893500000000001, 82: 1.8893500000000001, 83: 1.8893500000000001, 84: 1.8893500000000001, 85: 1.8893500000000001, 86: 1.8893500000000001, 87: 1.8893500000000001, 88: 1.8893500000000001, 89: 1.8893500000000001, 90: 1.8893500000000001, 91: 1.8893500000000001, 92: 1.8893500000000001, 93: 1.8893500000000001, 94: 1.8893500000000001, 95: 1.8893500000000001, 96: 1.8893500000000001, 97: 1.8893500000000001, 98: 1.8893500000000001, 99: 1.8893500000000001}, 'Plate': {0: 72, 1: 72, 2: 72, 3: 72, 4: 72, 5: 72, 6: 72, 7: 72, 8: 72, 9: 72, 10: 72, 11: 72, 12: 72, 13: 72, 14: 72, 15: 72, 16: 72, 17: 72, 18: 72, 19: 72, 20: 72, 21: 72, 22: 72, 23: 72, 24: 72, 25: 72, 26: 72, 27: 72, 28: 72, 29: 72, 30: 72, 31: 72, 32: 72, 33: 72, 34: 72, 35: 72, 36: 72, 37: 72, 38: 72, 39: 72, 40: 72, 41: 72, 42: 72, 43: 72, 44: 72, 45: 72, 46: 72, 47: 72, 48: 72, 49: 72, 50: 72, 51: 72, 52: 72, 53: 72, 54: 72, 55: 72, 56: 72, 57: 72, 58: 72, 59: 72, 60: 72, 61: 72, 62: 72, 63: 72, 64: 72, 65: 72, 66: 72, 67: 72, 68: 72, 69: 72, 70: 72, 71: 72, 72: 72, 73: 72, 74: 72, 75: 72, 76: 72, 77: 72, 78: 72, 79: 72, 80: 131, 81: 131, 82: 131, 83: 131, 84: 131, 85: 131, 86: 131, 87: 131, 88: 131, 89: 131, 90: 131, 91: 131, 92: 131, 93: 131, 94: 131, 95: 131, 96: 131, 97: 131, 98: 131, 99: 131}, 'Plate X': {0: -134.13406000000001, 1: -134.13406000000001, 2: -134.13406000000001, 3: -113.50433200000001, 4: -113.50433200000001, 5: -113.50433200000001, 6: -113.50433200000001, 7: -113.50433200000001, 8: -92.874604000000005, 9: -92.874604000000005, 10: -92.874604000000005, 11: -92.874604000000005, 12: -72.244876000000005, 13: -72.244876000000005, 14: -72.244876000000005, 15: -72.244876000000005, 16: -72.244876000000005, 17: -72.244876000000005, 18: -72.244876000000005, 19: -51.615147999999998, 20: -51.615147999999998, 21: -51.615147999999998, 22: -51.615147999999998, 23: -51.615147999999998, 24: -30.985420000000001, 25: -30.985420000000001, 26: -30.985420000000001, 27: -30.985420000000001, 28: -30.985420000000001, 29: -30.985420000000001, 30: -30.985420000000001, 31: -30.985420000000001, 32: -10.355691999999999, 33: -10.355691999999999, 34: -10.355691999999999, 35: -10.355691999999999, 36: -10.355691999999999, 37: -10.355691999999999, 38: -10.355691999999999, 39: 10.274036000000001, 40: 10.274036000000001, 41: 10.274036000000001, 42: 10.274036000000001, 43: 10.274036000000001, 44: 10.274036000000001, 45: 10.274036000000001, 46: 30.903764000000002, 47: 30.903764000000002, 48: 30.903764000000002, 49: 30.903764000000002, 50: 30.903764000000002, 51: 30.903764000000002, 52: 30.903764000000002, 53: 30.903764000000002, 54: 51.533491999999995, 55: 51.533491999999995, 56: 51.533491999999995, 57: 51.533491999999995, 58: 51.533491999999995, 59: 51.533491999999995, 60: 51.533491999999995, 61: 72.163219999999995, 62: 72.163219999999995, 63: 72.163219999999995, 64: 72.163219999999995, 65: 72.163219999999995, 66: 72.163219999999995, 67: 92.792947999999996, 68: 92.792947999999996, 69: 92.792947999999996, 70: 92.792947999999996, 71: 92.792947999999996, 72: 113.422676, 73: 113.422676, 74: 113.422676, 75: 113.422676, 76: 113.422676, 77: 134.052404, 78: 134.052404, 79: 134.052404, 80: -134.13406000000001, 81: -134.13406000000001, 82: -134.13406000000001, 83: -113.50433200000001, 84: -113.50433200000001, 85: -113.50433200000001, 86: -113.50433200000001, 87: -113.50433200000001, 88: -92.874604000000005, 89: -92.874604000000005, 90: -92.874604000000005, 91: -92.874604000000005, 92: -72.244876000000005, 93: -72.244876000000005, 94: -72.244876000000005, 95: -72.244876000000005, 96: -72.244876000000005, 97: -72.244876000000005, 98: -72.244876000000005, 99: -51.615147999999998}, 'Plate Y': {0: -27.0123, 1: 0.039899999999999998, 2: 27.092099999999999, 3: -81.116699999999994, 4: -54.064500000000002, 5: 0.039899999999999998, 6: 54.144300000000001, 7: 81.1965, 8: -54.064500000000002, 9: -27.0123, 10: 27.092099999999999, 11: 54.144300000000001, 12: -108.16889999999999, 13: -81.116699999999994, 14: -27.0123, 15: 0.039899999999999998, 16: 27.092099999999999, 17: 81.1965, 18: 108.2487, 19: -81.116699999999994, 20: -54.064500000000002, 21: 0.039899999999999998, 22: 54.144300000000001, 23: 81.1965, 24: -135.22110000000001, 25: -108.16889999999999, 26: -54.064500000000002, 27: -27.0123, 28: 27.092099999999999, 29: 54.144300000000001, 30: 108.2487, 31: 135.30090000000001, 32: -108.16889999999999, 33: -81.116699999999994, 34: -27.0123, 35: 0.039899999999999998, 36: 27.092099999999999, 37: 81.1965, 38: 108.2487, 39: -135.22110000000001, 40: -81.116699999999994, 41: -54.064500000000002, 42: 0.039899999999999998, 43: 54.144300000000001, 44: 81.1965, 45: 135.30090000000001, 46: -135.22110000000001, 47: -108.16889999999999, 48: -54.064500000000002, 49: -27.0123, 50: 27.092099999999999, 51: 54.144300000000001, 52: 108.2487, 53: 135.30090000000001, 54: -108.16889999999999, 55: -81.116699999999994, 56: -27.0123, 57: 0.039899999999999998, 58: 27.092099999999999, 59: 81.1965, 60: 108.2487, 61: -81.116699999999994, 62: -54.064500000000002, 63: 0.039899999999999998, 64: 54.144300000000001, 65: 81.1965, 66: 108.2487, 67: -108.16889999999999, 68: -54.064500000000002, 69: -27.0123, 70: 27.092099999999999, 71: 54.144300000000001, 72: -81.116699999999994, 73: -27.0123, 74: 0.039899999999999998, 75: 27.092099999999999, 76: 81.1965, 77: -54.064500000000002, 78: 0.039899999999999998, 79: 54.144300000000001, 80: -27.0123, 81: 0.039899999999999998, 82: 27.092099999999999, 83: -81.116699999999994, 84: -54.064500000000002, 85: 0.039899999999999998, 86: 54.144300000000001, 87: 81.1965, 88: -54.064500000000002, 89: -27.0123, 90: 27.092099999999999, 91: 54.144300000000001, 92: -108.16889999999999, 93: -81.116699999999994, 94: -27.0123, 95: 0.039899999999999998, 96: 27.092099999999999, 97: 81.1965, 98: 108.2487, 99: -81.116699999999994}, 'Unnamed: 0': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 67, 68: 68, 69: 69, 70: 70, 71: 71, 72: 72, 73: 73, 74: 74, 75: 75, 76: 76, 77: 77, 78: 78, 79: 79, 80: 80, 81: 81, 82: 82, 83: 83, 84: 84, 85: 85, 86: 86, 87: 87, 88: 88, 89: 89, 90: 90, 91: 91, 92: 92, 93: 93, 94: 94, 95: 95, 96: 96, 97: 97, 98: 98, 99: 99}}
From your question it doesnt sound like you want a multivariate regression (i.e. multiple Y's). If you're just predicting a single Y from multiple X's, you can do it like this with pandas, then save the results to a txt file: import pandas as pd df = pd.DataFrame(DF) res = pd.stats.api.ols(y=df['C-A'], x=df[['Plate X','Plate Y','FIELD X']]) file = open("C:/Users/Simon/Desktop/results.txt", "w") file.write(str(res)) file.close() You mentioned in the question that you want to group the analyses by Drum and Plate. However, every value is the same for the Drum rows. If you want to group by Plate, however, and then run OLS on each subgroup, you can do something like this: import pandas as pd df = pd.DataFrame(DF) results = [] def ols_res(df): results.append( pd.stats.api.ols(y=df['C-A'], x=df[['Plate X','Plate Y','FIELD X']])) df.groupby('Plate').apply(lambda newdf: ols_res(newdf)) file = open("C:/Users/Simon/Desktop/results.txt", "w") for el in results: file.write(str(el)) file.close() If you want to also group by Drum, and note which drum/plate combination each analysis is for, you can do something like this and just add some extra txt to the results file: import pandas as pd df = pd.DataFrame(DF) results = [] def ols_res(df): curCombo = "plate:" + str(df["Plate"].mean()) + ", drum:" + str(df["Drum"].unique()) regression_results = pd.stats.api.ols(y=df['C-A'], x=df[['Plate X','Plate Y','FIELD X']]) results.append([curCombo, regression_results]) df.groupby(['Plate', 'Drum']).apply(lambda newdf: ols_res(newdf)) file = open("C:/Users/Simon/Desktop/results.txt", "w") for el in results: file.write(str(el)) file.write("\n\n") file.close()