PL/SQL dynamic INSERT - sql
I have a TABLE_A, which contains a column with comma separated values as data. Now I have to put these comma separated values into TABLE B of 250 columns, this has to be done dynamically?
Here's a quick and dirty script to get you going
SQL> create table T ( c clob );
Table created.
SQL>
SQL> create table t1 ( c1 varchar2(50) );
Table created.
SQL> begin
2 for i in 2 .. 250 loop
3 execute immediate 'alter table t1 add c'||i||' varchar2(50)';
4 end loop;
5 end;
6 /
PL/SQL procedure successfully completed.
SQL>
SQL> declare
2 v clob := 'somedata1';
3 begin
4 for i in 2 .. 250 loop
5 v := v || ',somedata'||i;
6 end loop;
7 insert into t
8 select v from dual connect by level <= 2000;
9 end;
10 /
PL/SQL procedure successfully completed.
SQL>
SQL> set timing on
SQL> declare
2 type t_cols is table of varchar2(100) index by pls_integer;
3 l_cols t_cols;
4
5 l_comma pls_integer;
6 l_text varchar2(32767);
7
8 l_insert_sql varchar2(4000) := 'insert into t1 values (:1';
9 begin
10 for i in 2 .. 250 loop
11 l_insert_sql := l_insert_sql || ',:'||i;
12 end loop;
13 l_insert_sql := l_insert_sql || ')';
14
15 for i in ( select rownum r, c from t ) loop
16 l_text := i.c||',';
17
18 for x in 1 .. 250 loop
19 l_comma := instr(l_text,',');
20 l_cols(x) := substr(l_text,1,l_comma-1);
21 l_text := substr(l_text,l_comma+1);
22 end loop;
23
24 execute immediate l_insert_sql
25 using
26 l_cols(1)
27 ,l_cols(2)
28 ,l_cols(3)
29 ,l_cols(4)
30 ,l_cols(5)
31 ,l_cols(6)
32 ,l_cols(7)
33 ,l_cols(8)
34 ,l_cols(9)
35 ,l_cols(10)
36 ,l_cols(11)
37 ,l_cols(12)
38 ,l_cols(13)
39 ,l_cols(14)
40 ,l_cols(15)
41 ,l_cols(16)
42 ,l_cols(17)
43 ,l_cols(18)
44 ,l_cols(19)
45 ,l_cols(20)
46 ,l_cols(21)
47 ,l_cols(22)
48 ,l_cols(23)
49 ,l_cols(24)
50 ,l_cols(25)
51 ,l_cols(26)
52 ,l_cols(27)
53 ,l_cols(28)
54 ,l_cols(29)
55 ,l_cols(30)
56 ,l_cols(31)
57 ,l_cols(32)
58 ,l_cols(33)
59 ,l_cols(34)
60 ,l_cols(35)
61 ,l_cols(36)
62 ,l_cols(37)
63 ,l_cols(38)
64 ,l_cols(39)
65 ,l_cols(40)
66 ,l_cols(41)
67 ,l_cols(42)
68 ,l_cols(43)
69 ,l_cols(44)
70 ,l_cols(45)
71 ,l_cols(46)
72 ,l_cols(47)
73 ,l_cols(48)
74 ,l_cols(49)
75 ,l_cols(50)
76 ,l_cols(51)
77 ,l_cols(52)
78 ,l_cols(53)
79 ,l_cols(54)
80 ,l_cols(55)
81 ,l_cols(56)
82 ,l_cols(57)
83 ,l_cols(58)
84 ,l_cols(59)
85 ,l_cols(60)
86 ,l_cols(61)
87 ,l_cols(62)
88 ,l_cols(63)
89 ,l_cols(64)
90 ,l_cols(65)
91 ,l_cols(66)
92 ,l_cols(67)
93 ,l_cols(68)
94 ,l_cols(69)
95 ,l_cols(70)
96 ,l_cols(71)
97 ,l_cols(72)
98 ,l_cols(73)
99 ,l_cols(74)
100 ,l_cols(75)
101 ,l_cols(76)
102 ,l_cols(77)
103 ,l_cols(78)
104 ,l_cols(79)
105 ,l_cols(80)
106 ,l_cols(81)
107 ,l_cols(82)
108 ,l_cols(83)
109 ,l_cols(84)
110 ,l_cols(85)
111 ,l_cols(86)
112 ,l_cols(87)
113 ,l_cols(88)
114 ,l_cols(89)
115 ,l_cols(90)
116 ,l_cols(91)
117 ,l_cols(92)
118 ,l_cols(93)
119 ,l_cols(94)
120 ,l_cols(95)
121 ,l_cols(96)
122 ,l_cols(97)
123 ,l_cols(98)
124 ,l_cols(99)
125 ,l_cols(100)
126 ,l_cols(101)
127 ,l_cols(102)
128 ,l_cols(103)
129 ,l_cols(104)
130 ,l_cols(105)
131 ,l_cols(106)
132 ,l_cols(107)
133 ,l_cols(108)
134 ,l_cols(109)
135 ,l_cols(110)
136 ,l_cols(111)
137 ,l_cols(112)
138 ,l_cols(113)
139 ,l_cols(114)
140 ,l_cols(115)
141 ,l_cols(116)
142 ,l_cols(117)
143 ,l_cols(118)
144 ,l_cols(119)
145 ,l_cols(120)
146 ,l_cols(121)
147 ,l_cols(122)
148 ,l_cols(123)
149 ,l_cols(124)
150 ,l_cols(125)
151 ,l_cols(126)
152 ,l_cols(127)
153 ,l_cols(128)
154 ,l_cols(129)
155 ,l_cols(130)
156 ,l_cols(131)
157 ,l_cols(132)
158 ,l_cols(133)
159 ,l_cols(134)
160 ,l_cols(135)
161 ,l_cols(136)
162 ,l_cols(137)
163 ,l_cols(138)
164 ,l_cols(139)
165 ,l_cols(140)
166 ,l_cols(141)
167 ,l_cols(142)
168 ,l_cols(143)
169 ,l_cols(144)
170 ,l_cols(145)
171 ,l_cols(146)
172 ,l_cols(147)
173 ,l_cols(148)
174 ,l_cols(149)
175 ,l_cols(150)
176 ,l_cols(151)
177 ,l_cols(152)
178 ,l_cols(153)
179 ,l_cols(154)
180 ,l_cols(155)
181 ,l_cols(156)
182 ,l_cols(157)
183 ,l_cols(158)
184 ,l_cols(159)
185 ,l_cols(160)
186 ,l_cols(161)
187 ,l_cols(162)
188 ,l_cols(163)
189 ,l_cols(164)
190 ,l_cols(165)
191 ,l_cols(166)
192 ,l_cols(167)
193 ,l_cols(168)
194 ,l_cols(169)
195 ,l_cols(170)
196 ,l_cols(171)
197 ,l_cols(172)
198 ,l_cols(173)
199 ,l_cols(174)
200 ,l_cols(175)
201 ,l_cols(176)
202 ,l_cols(177)
203 ,l_cols(178)
204 ,l_cols(179)
205 ,l_cols(180)
206 ,l_cols(181)
207 ,l_cols(182)
208 ,l_cols(183)
209 ,l_cols(184)
210 ,l_cols(185)
211 ,l_cols(186)
212 ,l_cols(187)
213 ,l_cols(188)
214 ,l_cols(189)
215 ,l_cols(190)
216 ,l_cols(191)
217 ,l_cols(192)
218 ,l_cols(193)
219 ,l_cols(194)
220 ,l_cols(195)
221 ,l_cols(196)
222 ,l_cols(197)
223 ,l_cols(198)
224 ,l_cols(199)
225 ,l_cols(200)
226 ,l_cols(201)
227 ,l_cols(202)
228 ,l_cols(203)
229 ,l_cols(204)
230 ,l_cols(205)
231 ,l_cols(206)
232 ,l_cols(207)
233 ,l_cols(208)
234 ,l_cols(209)
235 ,l_cols(210)
236 ,l_cols(211)
237 ,l_cols(212)
238 ,l_cols(213)
239 ,l_cols(214)
240 ,l_cols(215)
241 ,l_cols(216)
242 ,l_cols(217)
243 ,l_cols(218)
244 ,l_cols(219)
245 ,l_cols(220)
246 ,l_cols(221)
247 ,l_cols(222)
248 ,l_cols(223)
249 ,l_cols(224)
250 ,l_cols(225)
251 ,l_cols(226)
252 ,l_cols(227)
253 ,l_cols(228)
254 ,l_cols(229)
255 ,l_cols(230)
256 ,l_cols(231)
257 ,l_cols(232)
258 ,l_cols(233)
259 ,l_cols(234)
260 ,l_cols(235)
261 ,l_cols(236)
262 ,l_cols(237)
263 ,l_cols(238)
264 ,l_cols(239)
265 ,l_cols(240)
266 ,l_cols(241)
267 ,l_cols(242)
268 ,l_cols(243)
269 ,l_cols(244)
270 ,l_cols(245)
271 ,l_cols(246)
272 ,l_cols(247)
273 ,l_cols(248)
274 ,l_cols(249)
275 ,l_cols(250);
276
277 end loop;
278
279 end;
280 /
PL/SQL procedure successfully completed.
Elapsed: 00:00:01.11
SQL>
Related
pandas df add new column based on proportion of two other columns from another dataframe
I have df1 which has three columns (loadgroup, cartons, blocks) like this loadgroup cartons blocks cartonsPercent blocksPercent 1 2269 14 26% 21% 2 1168 13 13% 19% 3 937 8 11% 12% 4 2753 24 31% 35% 5 1686 9 19% 13% total(sum of column) 8813 68 100% 100% The interpretation is like this: out of df1 26% cartons which is also 21% of blocks are assigned to loadgroup 1, etc. we can assume blocks are 1 to 68, cartons are 1 to 8813. I also have df2 which also has cartons and blocks columns. but does not have loadgroup. My goal is to assign loadgroup (1-5 as well) to df2 (100 blocks 29608 cartons in total), but keep the proportions, for example, for df2, 26% cartons 21% blocks assign loadgroup 1, 13% cartons 19% blocks assign loadgroup 2, etc. df2 is like this: block cartons 0 533 1 257 2 96 3 104 4 130 5 71 6 68 7 87 8 99 9 51 10 291 11 119 12 274 13 316 14 87 15 149 16 120 17 222 18 100 19 148 20 192 21 188 22 293 23 120 24 224 25 449 26 385 27 395 28 418 29 423 30 244 31 327 32 337 33 249 34 528 35 528 36 494 37 540 38 368 39 533 40 614 41 462 42 350 43 618 44 463 45 552 46 397 47 401 48 397 49 365 50 475 51 379 52 541 53 488 54 383 55 354 56 760 57 327 58 211 59 356 60 552 61 401 62 320 63 368 64 311 65 421 66 458 67 278 68 504 69 385 70 242 71 413 72 246 73 465 74 386 75 231 76 154 77 294 78 275 79 169 80 398 81 227 82 273 83 319 84 177 85 272 86 204 87 139 88 187 89 263 90 90 91 134 92 67 93 115 94 45 95 65 96 40 97 108 98 60 99 102 total 100 blocks 29608 cartons I want to add loadgroup column to df2, try to keep those proportions as close as possible. How to do it please? Thank you very much for the help.
I don't know how to find loadgroup column based on both cartons percent and blocks percent. But generate random loadgroup based on either cartons percent or blocks percent is easy. Here is what I did. I generate 100,000 seeds first, then for each seed, I add column loadgroup1 based on cartons percent, loadgroup2 based on blocks percent, then calculate both percentages, then compare with df1 percentages, get absolute difference, record it. For these 100,000 seeds, I take the minimum difference one as my solution, which is sufficient for my job. But this is not the optimal solution, and I am looking for quick and easy way to do this. Hope somebody can help. Here is my code. df = pd.DataFrame() np.random.seed(10000) seeds = np.random.randint(1, 1000000, size = 100000) for i in range(46530, 46537): print(seeds[i]) np.random.seed(seeds[i]) df2['loadGroup1'] = np.random.choice(df1.loadgroup, len(df2), p = df1.CartonsPercent) df2['loadGroup2'] = np.random.choice(df1.loadgroup, len(df2), p = df1.blocksPercent) df2.reset_index(inplace = True) three = pd.DataFrame(df2.groupby('loadGroup1').agg(Cartons = ('cartons', 'sum'), blocks = ('block', 'count'))) three['CartonsPercent'] = three.Cartons/three.Cartons.sum() three['blocksPercent'] = three.blocks/three.blocks.sum() four = df1[['CartonsPercent','blocksPercent']] - three[['CartonsPercent','blocksPercent']] four = four.abs() subdf = pd.DataFrame({'i':[i],'Seed':[seeds[i]], 'Percent':['CartonsPercent'], 'AbsDiff':[four.sum().sum()]}) df = pd.concat([df,subdf]) three = pd.DataFrame(df2.groupby('loadGroup2').agg(Cartons = ('cartons', 'sum'), blocks = ('block', 'count'))) three['CartonsPercent'] = three.Cartons/three.Cartons.sum() three['blocksPercent'] = three.blocks/three.blocks.sum() four = df1[['CartonsPercent','blocksPercent']] - three[['CartonsPercent','blocksPercent']] four = four.abs() subdf = pd.DataFrame({'i':[i],'Seed':[seeds[i]], 'Percent':['blocksPercent'], 'AbsDiff':[four.sum().sum()]}) df = pd.concat([df,subdf]) df.sort_values(by = 'AbsDiff', ascending = True, inplace = True) df = df.head(10) Actually the first row of df will tell me the seed I am looking for, I kept 10 rows just for curiosity. Here is my solution. block cartons loadgroup 0 533 4 1 257 1 2 96 4 3 104 4 4 130 4 5 71 2 6 68 1 7 87 4 8 99 4 9 51 4 10 291 4 11 119 2 12 274 2 13 316 4 14 87 4 15 149 5 16 120 3 17 222 2 18 100 2 19 148 2 20 192 3 21 188 4 22 293 1 23 120 2 24 224 4 25 449 1 26 385 5 27 395 3 28 418 1 29 423 4 30 244 5 31 327 1 32 337 5 33 249 4 34 528 1 35 528 1 36 494 5 37 540 3 38 368 2 39 533 4 40 614 5 41 462 4 42 350 5 43 618 4 44 463 2 45 552 1 46 397 3 47 401 3 48 397 1 49 365 1 50 475 4 51 379 1 52 541 1 53 488 2 54 383 2 55 354 1 56 760 5 57 327 4 58 211 2 59 356 5 60 552 4 61 401 1 62 320 1 63 368 3 64 311 3 65 421 2 66 458 5 67 278 4 68 504 5 69 385 4 70 242 4 71 413 1 72 246 2 73 465 5 74 386 4 75 231 1 76 154 4 77 294 4 78 275 1 79 169 4 80 398 4 81 227 4 82 273 1 83 319 3 84 177 4 85 272 5 86 204 3 87 139 1 88 187 4 89 263 4 90 90 4 91 134 4 92 67 3 93 115 3 94 45 2 95 65 2 96 40 4 97 108 2 98 60 2 99 102 1 Here are the summaries. loadgroup cartons blocks cartonsPercent blocksPercent 1 7610 22 26% 22% 2 3912 18 13% 18% 3 3429 12 12% 12% 4 9269 35 31% 35% 5 5388 13 18% 13% It's very close to my target though.
pandas how to filter and slice with multiple conditions
Using pandas, how do I return dataframe filtered by value of 2 in 'GEN' column, value 20 in 'AGE' column and exclude columns with name 'GEN' and 'BP'? Thanks in advance:) AGE GEN BMI BP S1 S2 S3 S4 S5 S6 Y 59 2 32.1 101 157 93.2 38 4 4.8598 87 151 48 1 21.6 87 183 103.2 70 3 3.8918 69 75 72 2 30.5 93 156 93.6 41 4 4.6728 85 141 24 1 25.3 84 198 131.4 40 5 4.8903 89 206 50 1 23 101 192 125.4 52 4 4.2905 80 135 23 1 22.6 89 139 64.8 61 2 4.1897 68 97 20 2 22 90 160 99.6 50 3 3.9512 82 138 66 2 26.2 114 255 185 56 4.5 4.2485 92 63 60 2 32.1 83 179 119.4 42 4 4.4773 94 110 20 1 30 85 180 93.4 43 4 5.3845 88 310
You can do this - cols = df.columns[~df.columns.isin(['GEN','BP'])] out=df.loc[(df['GEN'] == 2) & (df['AGE'] == 20),cols] OR out=df.query("'GEN'==2 and 'AGE'==20").loc[cols]
create new column from divided columns over iteration
I am working with the following code: url = 'https://raw.githubusercontent.com/dothemathonthatone/maps/master/fertility.csv' df = pd.read_csv(url) year regional_schlüssel Aus15 Deu15 Aus16 Deu16 Aus17 Deu17 Aus18 Deu18 ... aus36 aus37 aus38 aus39 aus40 aus41 aus42 aus43 aus44 aus45 0 2000 5111000 0 4 8 25 20 45 56 89 ... 935 862 746 732 792 660 687 663 623 722 1 2000 5113000 1 1 4 14 13 33 19 48 ... 614 602 498 461 521 470 393 411 397 400 2 2000 5114000 0 11 0 5 2 13 7 20 ... 317 278 265 235 259 228 204 173 213 192 3 2000 5116000 0 2 2 7 3 28 13 26 ... 264 217 206 207 197 177 171 146 181 169 4 2000 5117000 0 0 3 1 2 4 4 7 ... 135 129 118 116 128 148 89 110 124 83 I would like to create a new set of columns fertility_deu15, ..., fertility_deu45 and fertility_aus15, ..., fertility_aus45 such that aus15 / Aus15 = fertiltiy_aus15 and deu15/ Deu15 = fertility_deu15 for each ausi and Ausj where j == i \n [15-45] and deui:Deuj where j == i \n [15-45]
I'm not sure what is up with that data but we need to fix it to make it numeric. I'll end up doing that while filtering numerator = df.filter(regex='^[a-z]+\d+$') # Lower case ones numerator = numerator.apply(pd.to_numeric, errors='coerce') # Fix numbers denominator = df.filter(regex='^[A-Z][a-z]+\d+$').rename(columns=str.lower) denominator = denominator.apply(pd.to_numeric, errors='coerce') numerator.div(denominator).add_prefix('fertility_')
To find avg in pig and sort it in ascending order
have a schema with 9 fields and i want to take only two fields(6,7 i.e $5,$6) and i want to calculate the average of $5 and i want to sort the $6 in ascending order so how to do this task can some one help me. Input Data: N368SW 188 170 175 17 -1 MCO MHT 1142 N360SW 100 115 87 -10 5 MCO MSY 550 N626SW 114 115 90 13 14 MCO MSY 550 N252WN 107 115 84 -10 -2 MCO MSY 550 N355SW 104 115 85 -1 10 MCO MSY 550 N405WN 113 110 96 14 11 MCO ORF 655 N456WN 110 110 92 24 24 MCO ORF 655 N743SW 144 155 124 7 18 MCO PHL 861 N276WN 142 150 129 -2 6 MCO PHL 861 N369SW 153 145 134 30 22 MCO PHL 861 N363SW 151 145 137 5 -1 MCO PHL 861 N346SW 141 150 128 51 60 MCO PHL 861 N785SW 131 145 118 -15 -1 MCO PHL 861 N635SW 144 155 127 -6 5 MCO PHL 861 N242WN 298 300 276 68 70 MCO PHX 1848 N439WN 130 140 111 -4 6 MCO PIT 834 N348SW 140 135 124 7 2 MCO PIT 834 N672SW 136 135 122 9 8 MCO PIT 834 N493WN 151 160 136 -9 0 MCO PVD 1073 N380SW 170 155 155 13 -2 MCO PVD 1073 N705SW 164 160 147 6 2 MCO PVD 1073 N233LV 157 160 143 1 4 MCO PVD 1073 N786SW 156 160 139 6 10 MCO PVD 1073 N280WN 160 160 146 1 1 MCO PVD 1073 N282WN 104 95 81 10 1 MCO RDU 534 N694SW 89 100 77 3 14 MCO RDU 534 N266WN 94 95 82 9 10 MCO RDU 534 N218WN 98 100 77 12 14 MCO RDU 534 N355SW 47 50 35 15 18 MCO RSW 133 N388SW 44 45 30 37 38 MCO RSW 133 N786SW 46 50 31 4 8 MCO RSW 133 N707SA 52 50 33 10 8 MCO RSW 133 N795SW 176 185 153 -9 0 MCO SAT 1040 N402WN 176 185 161 4 13 MCO SAT 1040 N690SW 123 130 107 -1 6 MCO SDF 718 N457WN 135 130 105 20 15 MCO SDF 718 N720WN 144 155 131 13 24 MCO STL 880 N775SW 147 160 135 -6 7 MCO STL 880 N291WN 136 155 122 96 115 MCO STL 880 N247WN 144 155 127 43 54 MCO STL 880 N748SW 179 185 159 -4 2 MDW ABQ 1121 N709SW 176 190 158 21 35 MDW ABQ 1121 N325SW 110 105 97 36 31 MDW ALB 717 N305SW 116 110 90 107 101 MDW ALB 717 N403WN 145 165 128 -6 14 MDW AUS 972 N767SW 136 165 125 59 88 MDW AUS 972 N730SW 118 120 100 28 30 MDW BDL 777 i have written the code like this but it is not working properly: a = load '/path/to/file' using PigStorage('\t'); b = foreach a generate (int)$5 as field_a:int,(chararray)$6 as field_b:chararray; c = group b all; d = foreach c generate b.field_b,AVG(b.field_a); e = order d by field_b ASC; dump e; I am facing error at order by: grunt> a = load '/user/horton/sample_pig_data.txt' using PigStorage('\t'); grunt> b = foreach a generate (int)$5 as fielda:int,(chararray)$6 as fieldb:chararray; grunt> describe #; b: {fielda: int,fieldb: chararray} grunt> c = group b all; grunt> describe #; c: {group: chararray,b: {(fielda: int,fieldb: chararray)}} grunt> d = foreach c generate b.fieldb,AVG(b.fielda); grunt> e = order d by fieldb ; 2017-01-05 15:51:29,623 [main] ERROR org.apache.pig.tools.grunt.Grunt - ERROR 1025: <line 6, column 15> Invalid field projection. Projected field [fieldb] does not exist in schema: :bag{:tuple(fieldb:chararray)},:double. Details at logfile: /root/pig_1483631021021.log I want output like(not related to input data): (({(Bharathi),(Komal),(Archana),(Trupthi),(Preethi),(Rajesh),(siddarth),(Rajiv) }, { (72) , (83) , (87) , (75) , (93) , (90) , (78) , (89) }),83.375)
If you have found the answer, best practice is to post it so that others referring to this can have a better understanding.
How to query using an array of columns on SQL Server 2008
Can you please help on this, Im trying to write a query which retrieves a total amount from an array of columns, I dont know if there is a way to do this, I retrieve the array of columns I need from this query: USE Facebook_Global GO SELECT c.name AS column_name FROM sys.tables AS t INNER JOIN sys.columns AS c ON t.OBJECT_ID = c.OBJECT_ID WHERE t.name LIKE '%Lifetime Likes by Gender and###$%' and c.name like '%m%' Which gives me this table column_name M#13-17 M#18-24 M#25-34 M#35-44 M#45-54 M#55-64 M#65+ So I need a query that gives me a TotalAmount of those columns listed in that table. Can this be possible? Just to clarify a little: I have this table Date F#13-17 F#18-24 F#25-34 F#35-44 F#45-54 F#55-64 F#65+ M#13-17 M#18-24 M#25-34 M#35-44 M#45-54 M#55-64 M#65+ 2015-09-06 00:00:00.000 257 3303 1871 572 235 116 71 128 1420 824 251 62 32 30 2015-09-07 00:00:00.000 257 3302 1876 571 234 116 72 128 1419 827 251 62 32 30 2015-09-08 00:00:00.000 257 3304 1877 572 234 116 73 128 1421 825 253 62 32 30 2015-09-09 00:00:00.000 257 3314 1891 575 236 120 73 128 1438 828 254 62 33 30 2015-09-10 00:00:00.000 259 3329 1912 584 245 131 76 128 1460 847 259 66 37 31 2015-09-11 00:00:00.000 259 3358 1930 605 248 136 79 128 1475 856 261 67 39 31 2015-09-12 00:00:00.000 259 3397 1953 621 255 139 79 128 1486 864 264 68 41 31 2015-09-13 00:00:00.000 259 3426 1984 642 257 144 80 129 1499 883 277 74 42 32 And I need a column with a SUM of all the columns containing the word F and other containig the word M, instead of using something like this: F#13-17+F#18-24+F#25-34+F#35-44+F#45-54+etc. Is this possible?
Try something like this: with derivedTable as (sql from your question goes here) select column_name from derivedTable union select cast(count(*) as varchar (10) + 'records' from derivedTable