I have these statistics of a table in netezza
/nz/support-IBM_Netezza-11.2.1.1-210825-0050/bin/nz_genstats OID_DB.OID_DB.OID_PAGOS_APLICADOS_FIJO
/nz/support-IBM_Netezza-11.2.1.1-210825-0050/bin/nz_get OID_DB.OID_DB.OID_PAGOS_APLICADOS_FIJO
Table: OID_PAGOS_APLICADOS_FIJO (276666)
Rowcount: 9,602,310
Dist Key: IDFAC
attnum Column Name Statistics Status Minimum Value Maximum Value # of Unique Values # of NULLs MaxLen AvgLen
------ ------------------------ ================== =============== =============== ==================== ==================== ====== ======
1 FECHA_PROCESO Express 2020-01-01 2022-08-01 940
2 DOCUMENTO Express 0011895954 9998147 2,235,478 12 10
3 USUARIO Express AAGARCIA ZRAMIREC 1,509 20 14
4 NOMBRE_USUARIO Express ABEL DAVID SARI ZOILA ROSA RAMI 1,525 71 23
5 FECHA_PAGO Express 2009-06-19 10:2 2022-08-01 20:2 308,032
6 FECHA_PAGO_CONTABLE Express 2009-06-19 10:2 2022-08-01 20:2 305,643
7 TIPO_DOC Express AJC VKA 50 5 5
8 DESCRIPCION_TIPO_DOC Express AJUSTE TRANSFERENCIA 48 92,138 34 18
9 CODIGO_BANCO Express 003 999 10 1,815,649 5 5
10 NOMBRE_BANCO Express BOLIVARIANO BAN TELMEX RRHH 9 1,817,818 23 19
11 CTA_CORRIENTE Express 0005046294 7621019 18 1,815,649 52 52
12 CODIGO_CLIENTE Express 00000005 20523352 516,577 10 10
13 IDENTIFICACION Express 077083801 h234573 516,384 17 12
14 TIPO_IDENTIDICACION Express CEDULA DE IDENT RUC 3 21 20
15 NOMBRE_CLIENTE Express BEIERSDORF S.A �USTA SELENA QU 518,080 112 31
16 SEGMENTO_MERCADO Express CARRIER RESIDENCIAL 9 4 24 13
17 GESTOR Express ANGEL GUILLERMO RRAMIREG 6 9,539,531 32 19
18 REF_LOTE Express 6926 78937 41 9,539,282
19 VALOR_RECIBIDO Express 0.0100 3237920.0000 43,192
20 ESTADO_RECIBO_NC Express A PAGADO TOTALMEN 4 21 4
21 SALDO Express -123.38 35197.12 5,795
22 IDFAC Express 0000000094 0067735776 8,687,120 648 12 12
23 TIPO_DOC_AFEC Express AJD NDI 13 648 5 5
24 FACTURA Express 000-000-0000001 999-999-0067722 2,260,744 651 20 18
25 FECHA_EMISION_FACTURA Express 2004-09-08 00:0 2023-03-15 00:0 4,196 648
26 MES_FACTURA Express 200409 202303 220 648 8 8
27 ID_CICLO Express 1 429 22 5,803,887
28 CICLO_DOC Express CICLO 2 MENSUAL CICLO VARIOS QU 22 5,803,887 31 17
29 VALOR_APLICADO Express 0.0020 381157.3100 37,738 2
30 FECHA_APLICACION Express 2020-01-01 00:0 2022-08-01 23:4 787,990 2
31 FORMAPAGO Express CHEQUE TRANSFERENCIAS 7 5,784,974 26 15
32 ESTADO_DOCUMENTO Express EMITIDO PAGADO TOTALMEN 3 93,703 21 19
33 FECHA_VENCIMIENTO Express 2004-09-23 00:0 2025-07-26 12:2 315,756 648
34 MES_VENCIMIENTO Express 200409 202507 251 648 8 8
35 PARROQUIA Express 12 DE MARZO ZONA NAVAL 1,010 1,603,596 41 14
36 CANTON Express 24 DE MAYO ZAMORA 103 1,603,596 29 9
37 CODIGO_SUCURSAL Express 0000000003 0018313083 560,976 22,723 12 12
38 ID_CANAL Express ASP VENT 5 4,750,391 6 6
39 DESC_CANAL Express Autoservicio Ventanilla 5 4,750,391 26 16
how can i get the columns attnum, column name and # of unique values
I have this Shell Script
table="OID_DB.OID_DB.OID_PAGOS_APLICADOS_FIJO"
gen_stats=$(/nz/support-IBM_Netezza-11.2.1.1-210825-0050/bin/nz_genstats $table)
get_stats=$(/nz/support-IBM_Netezza-11.2.1.1-210825-0050/bin/nz_get $table)
echo "$get_stats" | awk '/FECHA_PROCESO/, /DESC_CANAL/' | awk '{ print $1"|"$2"|"$6 }'
but the result obtained is
1|FECHA_PROCESO|940
2|DOCUMENTO|2,235,478
3|USUARIO|1,509
4|NOMBRE_USUARIO|SARI
5|FECHA_PAGO|2022-08-01
6|FECHA_PAGO_CONTABLE|2022-08-01
7|TIPO_DOC|50
8|DESCRIPCION_TIPO_DOC|48
9|CODIGO_BANCO|10
10|NOMBRE_BANCO|TELMEX
11|CTA_CORRIENTE|18
12|CODIGO_CLIENTE|516,577
13|IDENTIFICACION|516,384
14|TIPO_IDENTIDICACION|IDENT
15|NOMBRE_CLIENTE|�USTA
16|SEGMENTO_MERCADO|9
17|GESTOR|RRAMIREG
18|REF_LOTE|41
19|VALOR_RECIBIDO|43,192
20|ESTADO_RECIBO_NC|TOTALMEN
21|SALDO|5,795
22|IDFAC|8,687,120
23|TIPO_DOC_AFEC|13
24|FACTURA|2,260,744
25|FECHA_EMISION_FACTURA|2023-03-15
26|MES_FACTURA|220
27|ID_CICLO|22
28|CICLO_DOC|MENSUAL
29|VALOR_APLICADO|37,738
30|FECHA_APLICACION|2022-08-01
31|FORMAPAGO|7
32|ESTADO_DOCUMENTO|TOTALMEN
33|FECHA_VENCIMIENTO|2025-07-26
34|MES_VENCIMIENTO|251
35|PARROQUIA|MARZO
36|CANTON|MAYO
37|CODIGO_SUCURSAL|560,976
38|ID_CANAL|5
39|DESC_CANAL|5
How can I get the values of the # of Unique Values column
Using GNU awk for FIELDWIDTHS:
$ cat tst.awk
BEGIN { OFS="|" }
/^[-= ]+$/ {
inVals = 1
for ( i=1; i<=NF; i++ ) {
wids = wids " " (length($i) + 1)
}
FIELDWIDTHS = wids
$0 = prev
for ( i=1; i<=NF; i++ ) {
gsub(/^\s+|\s+$/,"",$i)
f[$i] = i
}
}
{ prev = $0}
inVals {
for ( i=1; i<=NF; i++ ) {
gsub(/^\s+|\s+$/,"",$i)
}
print $(f["attnum"]), $(f["Column Name"]), $(f["# of Unique Values"])
}
$ awk -f tst.awk file
attnum|Column Name|# of Unique Values
1|FECHA_PROCESO|940
2|DOCUMENTO|2,235,478
3|USUARIO|1,509
4|NOMBRE_USUARIO|1,525
5|FECHA_PAGO|308,032
6|FECHA_PAGO_CONTABLE|305,643
7|TIPO_DOC|50
8|DESCRIPCION_TIPO_DOC|48
9|CODIGO_BANCO|10
10|NOMBRE_BANCO|9
11|CTA_CORRIENTE|18
12|CODIGO_CLIENTE|516,577
13|IDENTIFICACION|516,384
14|TIPO_IDENTIDICACION|3
15|NOMBRE_CLIENTE|518,080
16|SEGMENTO_MERCADO|9
17|GESTOR|6
18|REF_LOTE|41
19|VALOR_RECIBIDO|43,192
20|ESTADO_RECIBO_NC|4
21|SALDO|5,795
22|IDFAC|8,687,120
23|TIPO_DOC_AFEC|13
24|FACTURA|2,260,744
25|FECHA_EMISION_FACTURA|4,196
26|MES_FACTURA|220
27|ID_CICLO|22
28|CICLO_DOC|22
29|VALOR_APLICADO|37,738
30|FECHA_APLICACION|787,990
31|FORMAPAGO|7
32|ESTADO_DOCUMENTO|3
33|FECHA_VENCIMIENTO|315,756
34|MES_VENCIMIENTO|251
35|PARROQUIA|1,010
36|CANTON|103
37|CODIGO_SUCURSAL|560,976
38|ID_CANAL|5
39|DESC_CANAL|5
My 2 cts to print fields of fixed width with awk
gawk 'BEGIN{OFS="|"}
{ if($0 ~ /\--|==/) {
print $0
for ( i=1; i<=NF; i++ ) {
if(i == 1){
fl[i]=length($i) + 1
} else {
fl[i]= fl[i - 1] + length($i) + 1
}
# fix double space at field 3
fl[3]=fl[3] + 1
}
}
if(NR >6){
print substr($0,1,fl[1]), substr($0,fl[1],fl[2] - fl[1]), substr($0,fl[5],fl[6] - fl[5])
}
}' test.txt | tr -d ' '
Result
1|FECHA_PROCESO|940
2|DOCUMENTO|2,235,478
3|USUARIO|1,509
4|NOMBRE_USUARIO|1,525
5|FECHA_PAGO|308,032
6|FECHA_PAGO_CONTABLE|305,643
7|TIPO_DOC|50
8|DESCRIPCION_TIPO_DOC|48
9|CODIGO_BANCO|10
10|NOMBRE_BANCO|9
11|CTA_CORRIENTE|18
12|CODIGO_CLIENTE|516,577
13|IDENTIFICACION|516,384
14|TIPO_IDENTIDICACION|3
15|NOMBRE_CLIENTE|518,080
....
With your shown samples and attempts only, please try following awk code, written and tested in GNU awk. Using match function of GNU awk here where I am mentioning regex ^\s+([0-9]+)\s+(\S+)\s+\S+\s+\S+\s+\S+\s+(\S+) which is further creating 2 capturing groups and as per match function giving array named arr which stores ONLY capturing group values into it. Since there are 3 capturing groups getting created so it will create 3 items into array arr starting from index 1 till total number of total capturing groups.
awk '
BEGIN{ OFS="|" }
match($0,/^\s+([0-9]+)\s+(\S+)\s+\S+\s+\S+\s+\S+\s+(\S+)/,arr){
print arr[1],arr[2],arr[3]
}
' Input_file
OR improving my own regex in above code, this will create 4 capturing groups out of which we need to print only 1st, 2nd and 4th values only as per requirement.
awk '
BEGIN{ OFS="|" }
match($0,/^\s+([0-9]+)\s+(\S+)(\s+\S+){3}\s+(\S+)/,arr){
print arr[1],arr[2],arr[4]
}
' Input_file
Explanation of regex: Adding detailed explanation for used regex in code.
^\s+ ##Matching spaces 1 or more occurrences from starting.
([0-9]+) ##Creating 1st capturing group which has 1 or more number of digits in it.
\s+ ##matching 1 or more spaces here.
(\S+) ##Creating 2nd capturing group which has 1 or more non-spaces here.
\s+\S+\s+ ##Matching 1 or more spaces followed by 1 or more non-spaces followed by 1 or more spaces.
\S+\s+\S+ ##Matching 1 or more non-spaces followed by 1 or more spaces followed by 1 or more non-spaces.
\s+ ##matching 1 or more spaces here.
(\S+) ##Creating 3rd capturing group which has 1 or more non-spaces here.
took me long enough :
mawk '
/^[ =-]+==[ =-]+$/ {
__=sprintf("%c",_+=___=(_+=_^=FS)*_)
___+=___
do {
sub("^",__,$_) } while(++_<___)
_=match($!_, __ (".+")__)
____=NR;___ = 3;__ = RLENGTH
} +____<NR {
$(___) = substr($!NF,_,__)
gsub("[^0-9]+","",$(NF =___)); print }' OFS='\f\r\t' ____=999
1
FECHA_PROCESO
940
2
DOCUMENTO
2235478
3
USUARIO
1509
4
NOMBRE_USUARIO
1525
5
FECHA_PAGO
308032
6
FECHA_PAGO_CONTABLE
305643
7
TIPO_DOC
50
8
DESCRIPCION_TIPO_DOC
48
9
CODIGO_BANCO
101
10
NOMBRE_BANCO
91
11
CTA_CORRIENTE
181
12
CODIGO_CLIENTE
516577
13
IDENTIFICACION
516384
14
TIPO_IDENTIDICACION
3
15
NOMBRE_CLIENTE
518080
16
SEGMENTO_MERCADO
9
17
GESTOR
69
18
REF_LOTE
419
19
VALOR_RECIBIDO
43192
20
ESTADO_RECIBO_NC
4
21
SALDO
5795
22
IDFAC
8687120
23
TIPO_DOC_AFEC
13
24
FACTURA
2260744
25
FECHA_EMISION_FACTURA
4196
26
MES_FACTURA
220
27
ID_CICLO
225
28
CICLO_DOC
225
29
VALOR_APLICADO
37738
30
FECHA_APLICACION
787990
31
FORMAPAGO
75
32
ESTADO_DOCUMENTO
3
33
FECHA_VENCIMIENTO
315756
34
MES_VENCIMIENTO
251
35
PARROQUIA
10101
36
CANTON
1031
37
CODIGO_SUCURSAL
560976
38
ID_CANAL
54
39
DESC_CANAL
54
I have a text file with a spacial format.
After the top "N" rows, the file will have a 7 column row ans then there will be "X" rows (X is the value from column number 6 in this 7 column row). Then there will be another row with 7 column and it will have further "Y" sub-rows (Y is the value from column number 6 in this 7 column row). and it occurance of rows will go upto some fixed numbers, say 40.
En example is here
(I am skipping top few rows).
2.857142857143E-01 2.857142857143E-01-2.857142857143E-01 1 1533 9 1.0
1 -3.52823873905418
2 -3.52823873905417
3 -1.77620635653680
4 -1.77620635653680
5 -1.77620570068355
6 -1.77620570068354
7 -1.77620570066112
8 -1.77620570066112
9 -1.60388273192418
1.428571428571E-01 1.428571428571E-01-1.428571428571E-01 2 1506 14 8.0
1 -3.52823678441811
2 -3.52823678441810
3 -1.77620282216865
4 -1.77620282216865
5 -1.77619365786042
6 -1.77619365786042
7 -1.77619324280126
8 -1.77619324280125
9 -1.60387130881086
10 -1.60387130881086
11 -1.60387074066972
12 -1.60387074066972
13 -1.51340357895078
14 -1.51340357895078
1.000000000000E+00 4.285714285714E-01-1.428571428571E-01 20 1524 51 24.0
1 -3.52823580096110
2 -3.52823580096109
3 -1.77624472106293
4 -1.77624472106293
5 -1.77623455229910
6 -1.77623455229909
7 -1.77620473017160
8 -1.77620473017159
9 -1.60387169115834
10 -1.60387169115834
11 -1.60386634866654
12 -1.60386634866654
13 -1.51340851656332
14 -1.51340851656332
15 -1.51340086887553
16 -1.51340086887553
17 -1.51321967923767
18 -1.51321967923766
19 -1.40212716813452
20 -1.40212716813451
21 -1.40187887062753
22 -1.40187887062753
23 -0.749391485667459
24 -0.749391485667455
25 -0.740712218931955
26 -0.740712218931954
27 -0.714030906779278
28 -0.714030906779278
29 -0.689087278411268
30 -0.689087278411265
31 -0.687054399753234
32 -0.687054399753233
33 -0.677686868127079
34 -0.677686868127075
35 -0.405343895324740
36 -0.405343895324739
37 -0.404786479693490
38 -0.404786479693488
39 -0.269454266134757
40 -0.269454266134755
41 -0.267490250650300
42 -0.267490250650296
43 -0.262198373307171
44 -0.262198373307170
45 -0.260912148881762
46 -0.260912148881761
47 -9.015623907768122E-002
48 -9.015623907767983E-002
49 0.150591609452852
50 0.150591609452856
51 0.201194203960446
I want to grep a particular number from my text file and to do so, I use
awk -v c=2 -v t=$GREP 'NR==1{d=$c-t;d=d<0?-d:d;v=$c;next}{m=$c-t;m=m<0?-m:m}m<d{d=m;v=$c}END{print v}' case.dat
Here $GREP is 0.2011942 which prints the last row (it will change according to different file)
51 0.201194203960446
I want to print the header row also with this number, i.e., my script should print,
51 0.201194203960446
1.000000000000E+00 4.285714285714E-01-1.428571428571E-01 20 1524 51 24.0.
How can I print this header row of the grepped numbers?
I have idea, but I could not implement it in script format.
Simply, grep the number using my script and print the first row before this number that have 7 columns.
This may be what you're looking for
awk -v t="$GREP" '
BEGIN { sub("\\.", "\\.", t) }
NF > 2 { header=$0; next }
NF == 2 && $2 ~ t { printf("%s %s\n%s\n", $1, $2, header) }
' file
You can replace the NF > 2 with NF == 7 if you want the strictly seven-column header to be printed (that header contains 6 columns in your sample data, not 7).
Update after the comment "Can you please modify my script so that it should grep upto 13 decimal number":
awk -v t="$GREP" '
BEGIN { if (match(t, "\\.")) {
t = substr(t, 1, RSTART + 13)
sub("\\.", "\\.", t)
}
}
NF > 2 { header=$0; next }
NF == 2 && $2 ~ t { printf("%s %s\n%s\n", $1, $2, header) }
' file
I have a file separated by \t.
header text with many lines
V F A B
10 30 26 42
14 33 25 45
16 32 23 43
18 37 22 48
I want to change the 3rd column by the 4th and vice versa. I'm using
awk '
BEGIN {
RS = "\n";
OFS="\t";
record=0;
};
record {
a = $4;
$4 = $3;
$3 = a;
};
$1=="V" {
record=1
};
{
print $0
};
'
}
Instead of just changing the position of the columns, column 3 also has the line break of the original 4th column:
header text with many lines
V F A B
10 30 42
26
14 33 45
25
16 32 43
23
18 37 48
22
How can I prevent this in order to get?
header text with many lines
V F A B
10 30 42 26
14 33 45 25
16 32 43 23
18 37 48 22
Could you please try following, using usual method of storing 1 field's value to a variable and then exchanging the value of 4th field to 3rd field, at last putting 4th field value as variable value(could say swapping values using a variable).
awk 'FNR==1{print;next} {val=$3;$3=$4;$4=val} 1' OFS="\t" Input_file
Or, this messy sed:
sed -E 's/([[:digit:]]+)([[:blank:]]+)([[:digit:]]+)([[:space:]]*)$/\3\2\1\4/' file
# ^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^
# 3rd column tab 4th column optional whitespce
question is for awk script (ref previous question some weeks ago) but bit more complicated .
input file looks :
Group1
id val1 val2
---------------------------
idone 2 10
idone 3 12
idone 6 9
idtwo 8 3
idtwo 14 1
Subtotal 33 35
Group2
id val1 val2
------------------------
idone 2 3
idone 1 4
idtwo 3 6
idtwo 4 7
Subtotal 10 20
Total 43 55
There might be more groups and in each group more entries .
I limited my example to 2 detail names idone, idtwo and 2 groups.
Now the purpose is to have them summarized.
with result as :
val1 val2
idone 14 38
idtwo 29 17
total 43 55
The output layout is free to choose :
if you prefer it may look like this as well :
total_idone_val1=14
total_idone_val2=38
total_idtwo_val1=29
total_idtwo_val2=17
overall_total_val1=43
overall_total_val2=55
give this awk cmd a try:
awk 'NF==3&&FNR>3&&!/[Tt]otal/{v1[$1]+=$2;v2[$1]+=$3}END{print "id","v1","v2";
for(x in v1){
print x,v1[x],v2[x]
s1+=v1[x]
s2+=v2[x]
}
print "total",s1,s2}' f1 f2
it gives:
id v1 v2
idtwo 29 17
idone 14 38
total 43 55
I have a dataset as below:
col-1 col-2 col-3 col-4 col-5 col-6 col-7 col-8
0 17 215 55.7059 947 BMR_42 O22-BMR_1 O23-H23
1 1 1 1.0000 1 BMR_42 O23-BMR_1 O23-H23
2 31 3 1.0968 34 BMR_31 O22-BMR_1 O26-H26
3 11 2 1.0909 12 BMR_31 O13-BMR_1 O26-H26
4 20 5 1.8500 37 BMR_49 O22-BMR_1 O26-H26
5 24 4 1.7917 43 BMR_49 O23-BMR_1 O26-H26
6 41 2 1.0488 43 BMR_49 O12-BMR_1 O12-H12
7 28 2 1.0357 29 BMR_49 O22-BMR_1 O13-H13
8 1 1000 1000.0000 1000 BMR_49 O13-BMR_1 O13-H13
9 1 1 1.0000 1 BMR_22 O12-BMR_2 O22-H22
10 50 62 18.9400 947 BMR_59 O13-BMR_2 O22-H22
11 1 1 1.0000 1 BMR_59 O25-BMR_2 O23-H23
12 34 5 1.1471 39 BMR_59 O13-BMR_2 O23-H23
13 7 6 2.1429 15 BMR_59 O26-BMR_2 O24-H24
14 6 8 3.6667 22 BMR_59 O25-BMR_2 O24-H24
15 28 2 1.1071 31 BMR_10 O26-BMR_2 O26-H26
16 52 121 15.1346 787 BMR_10 O25-BMR_2 O26-H26
17 65 9 1.9231 125 BMR_10 O13-BMR_2 O26-H26
18 4 4 2.2500 9 BMR_59 O26-BMR_2 O26-H26
19 9 1 1.0000 9 BMR_22 O15-BMR_2 O13-H13
20 1 1 1.0000 1 BMR_10 O11-BMR_2 O16-H16
21 7 2 1.1429 8 BMR_53 O13-BMR_2 O16-H16
22 2 3 2.5000 5 BMR_33 O13-BMR_3 O22-H22
23 97 54 6.8247 662 BMR_61 O26-BMR_3 O22-H22
24 1 1 1.0000 1 BMR_29 O26-BMR_3 O23-H23
25 31 36 3.3226 103 BMR_29 O16-BMR_3 O23-H23
(The real file contains over 2000 lines).
I want to select data under certain criteria and find the sum and average of that. For example I want to select lines containing O22 in column $7 and $8 and calculate the sum and average of the values in column $4.
I tried a script as below:
awk '$7 ~ /O22/ && $8 ~ /O22/ {sum += $4} END {print sum, (sum/NR) }' hhsolute.lifetime2.dat
This code could select the line correctly but when I want to calculate the average (sum/NR), I don't get the correct value.
I wish to get some help on this. How I could get the sum and average values only for the data lines I wanted?
Appreciate any help in advance.
awk -v tgt="O22" '
$7 ~ tgt && $8 ~ tgt { sum+=$4; cnt++ }
END { print sum+0, (cnt ? sum/cnt : 0) }
' file
Try this:
awk '$7~/O22/ && $8~/O22/{++n;sum+=$4}END{if(n) print "Sum = " (sum), "Average= "(sum/n)}' File
If 7th and 8th field both contains pattern O22, add 4th field value to variable sum, increase n. Within END block, print the sum and average.