Awk if statement with loop - awk

I want to sum the values of a column but I want to reset the average every 112 lines
what is wrong with this program
BEGIN{ sum=0;i=0}
{ sum=sum+$4
i=i+1
print i
if (i==112)
print "total " sum*8 " average " (sum/i)*8
sum=0
i=0
}
END{}
The output is always 1

awk '{
sum+=$4
}
NR%112==0
{
a=sum*8;
print a,a/112;
sum=0;
}' your_file

I forgot the braces
BEGIN{ sum=0;i=0}
{ sum=sum+$4
i=i+1
print i
if (i==112)
{print "total " sum*8 " average " (sum/i)*8
sum=0
i=0
}
}
END{}
Thank you

You can shorten it some:
awk '
{ sum+=$4
i+=1
print i
if (i==112)
{print "total " sum*8 " average " (sum/i)*8
sum=i=0
}
}' file
You may also use NR instead of i
awk '
{ sum+=$4
print NR%112
if (NR%112==0)
{print "total " sum*8 " average " (sum/112)*8
sum=0
}
}' file
I see that with this it prints 0 instead of line number 112 but that could be fixed if needed.

Related

Categorize a column and count the number of warnings in a column

I am having a file called out.txt as below:
Statement 1 Statement 2 Statement 3 Statement 4
The declaration is not done / Exp / * / This is expected
The declaration is starting/started / St / * / This is not expected
The declaration is not yet designed / Yt / & / This is a major one
The declaration is confirmed / Exp / * / This is okay
The declaration is not confirmed / Ntp / & / This is a major issue
I need to sum up and categorize from column 3 (Statement 3), if it is * as Warning and if it is & it is a Error as below:
Out:
Warnings:
Exp : 2
St : 1
Total : 3
Errors:
Yt : 1
Ntp: 1
Total :2
I tried below code, but not getting the exact output:
#!/bin/bash
echo " " ;
File="out.txt"
for z in out.txt;
do
if grep -q "&" $z/"$File"; then
echo "$z:";
awk -F' / '
{ a[$2]++ }
END{ for(j in a){ print j, a[j]; s=s+a[j] };
print "Total :", s}' out.txt
else
echo "$z:";
done
EDIT2: Since OP confirmed that there are NO keywords for errors it should be decided by & keyword in 2nd last field of line then try following.
awk -F'/' '
match($0,/[[:space:]]+\/[^/]*[[:space:]]+\//){
val=substr($0,RSTART,RLENGTH)
gsub(/[[:space:]]+|\//,"",val)
str=$(NF-1)
gsub(/ +/,"",str)
if(str=="&"){
countEr[val]++
}
else{
countSu[val]++
}
val=str=""
}
END{
print "Out:" ORS "Warings:"
for(i in countSu){
print "\t"i,countSu[i]
sumSu+=countSu[i]
}
print "Total:"sumSu
print "Errors:"
for(i in countEr){
print "\t"i,countEr[i]
sumEr+=countEr[i]
}
print "Total:"sumEr
}' Input_file
EDIT: Generic solution where one could give all errors names in a variable and then we need NOT to put it all conditions manually like my previous solution does. Could you please try following, based on your shown samples only written and tested with GNU awk.
awk -v errors="Ntp,Yt" '
BEGIN{
num=split(errors,arr,",")
for(i=1;i<=num;i++){
errorVal[arr[i]]
}
}
match($0,/[[:space:]]+\/[^/]*[[:space:]]+\//){
val=substr($0,RSTART,RLENGTH)
gsub(/[[:space:]]+|\//,"",val)
if(val in errorVal){
countEr[val]++
}
else{
countSu[val]++
}
val=""
}
END{
print "Out:" ORS "Warings:"
for(i in countSu){
print "\t"i,countSu[i]
sumSu+=countSu[i]
}
print "Total:"sumSu
print "Errors:"
for(i in countEr){
print "\t"i,countEr[i]
sumEr+=countEr[i]
}
print "Total:"sumEr
}' Input_file
Explanation: Adding detailed explanation for above.
awk ' ##Starting awk program from here.
match($0,/[[:space:]]+\/[^/]*[[:space:]]+\//){ ##Using match function to match space slash space and slash here as per samples to get value.
val=substr($0,RSTART,RLENGTH) ##Saving sub-string into variable val from RSTART to RLENGTH here.
gsub(/[[:space:]]+|\//,"",val) ##Removing spaces and slashes with NULL in val here.
if(val=="Ntp" || val=="Yt"){ ##Checking condition if value is either Ntp PR Yt then do following.
countEr[val]++ ##Increase count for array countEr with 1 with index of val here.
}
else{ ##Else do following.
countSu[val]++ ##Increase count of array countSu with index of val here.
}
val="" ##Nullifying val here.
}
END{ ##Starting END block of this program here.
print "Out:" ORS "Warnings:" ##Printing string Out new line and Warnings here.
for(i in countSu){ ##Traversing through countSu here.
print "\t"i,countSu[i] ##Printing tab index of array and value of CountSu here.
sumSu+=countSu[i] ##Keep on adding value of countSu current item into sumSu variable here.
}
print "Total:"sumSu ##Printing Total string with sumSu value here.
print "Errors:" ##Printing string Errors here.
for(i in countEr){ ##Traversing through countEr here.
print "\t"i,countEr[i] ##Printing tab index i and countEr value here.
sumEr+=countEr[i] ##Keep on adding value of countEr current item into sumEr variable here.
}
print "Total:"sumEr ##Printing Total string with sumEr value here.
}' Input_file ##Mentioning Input_file name here.
another gawk alternative - relies on gawk's "true multi-dimensional arrays":
$ cat tst.awk:
BEGIN {
FS="[[:blank:]]/[[:blank:]]"
OFS=" : "
}
FNR>1{
gsub(/[[:blank:]]/, "", $2)
gsub(/[[:blank:]]/, "", $3)
a[$3][$2]++
}
END {
#PROCINFO["sorted_in"]="#ind_str_desc"
print "Out" OFS
for(i in a) {
print (i=="*"?"Warnings":"Errors") OFS
t=0
for(j in a[i]) {
print "\t" j, a[i][j]
t+=a[i][j]
}
print "Total", t
t=0
}
}
gawk -tst.awk myFile results in:
Out :
Warnings :
St : 1
Exp : 2
Total : 3
Errors :
Ntp : 1
Yt : 1
Total : 2

awk: reformat date in a list of data-files (mass edit)

I want to awk a list of data-files. All records - there is an unknown number of records before - before , e.g.,
/10-12-2014 06:47:59/{p=1}
are to be skipped.
A brief template of one data file looks like this:
data_file_001
0; n records to be skipped
1;10-12-2014 06:47:59;
2;12-12-2014 10:17:44;
3;12-12-2014 10:37:44;
4;14-12-2014 10:00:32;
5;;movefield
6;16-12-2014 04:15:39;
needed Output ($2 datefield reformatted and $3 moved to $4):
colnum;date;col3;col4;col5
2;12.12.14;;
3;12.12.14;;
4;14.12.14;;
5;;;movefield;moved
6;16.12.14;;
My source file is this at the moment:
BEGIN { OFS=FS=";" ; print "colnum;date;col3;col4;col5"}
FNR == 1 { p=0 }
$3 == "movefield" { $4 = $3; $5 = "moved"; $3 = ""}
#(x=index($2," ") > 0) {DDMMYY = substr($2,1,x-1)}
$2=substr($2,1,11)
p!=0{print};
/10-12-2014 06:47:59/{p=1}
I have problems to reformat the data fields: The pattern-action (x=index($2," ") > 0) {DDMMYY = substr($2,1,x-1)} does not work nor $2=substr($2,1,11) in conjunction with the movefield action. Notice that the record where the movefield field appears has no date field.
Please have in mind that the awk is meant to be used on a bunch of files (loop).
With GNU awk for implace editing, no loop required:
awk -i inplace '
BEGIN { OFS=FS=";" ; print "colnum","date","col3","col4","col5" }
FNR==1 { next }
$3 == "movefield" { $4 = $3; $5 = "moved"; $3 = ""; print; next }
{ sub(/ .*/,"",$2); gsub(/-/,".",$2); print $0, ""}
' file*
Another in GNU awk:
$ awk '
function refmt(str) { # reformat date for comparing
split(str,d,"[ :-]")
return mktime(d[3] " " d[2] " " d[1] " " d[4] " " d[5] " " d[6])
}
BEGIN {
FS=OFS=";"
start=refmt("10-12-2014 06:47:59") # reformat the threshold date
print "colnum","date","col3","col4" # print header (why 5?)
}
refmt($2)>start || $2=="" { # if date > start or empty
sub(/ .*/,"",$2) # delete time part
gsub(/-/,".",$2) # replace - by .
$4=$3; $3="" # or $3 = OFS $3
print # output
}' file
colnum;date;col3;col4
2;12.12.2014;;
3;12.12.2014;;
4;14.12.2014;;
5;;;movefield
6;16.12.2014;;

awk 1-liner to cover match and non-matchng cas

Is there a way in an awk one-liner to cover both the positive and negative match case with different print statements?
To illustrate. Let's say I have a file where I want to prepend a set of words with '#' but still want to print all the words in the file.
Something like :
awk '/red/||/green/ { print "# My mod : " $0 } else { print $0 }'
Of course the above wont work. But what's the simple way to do this in awk.
Cheers,
Gert
To cover the general case i.e. printing something completely different, you can use next:
awk '/red|green/ { print "foo"; next } { print "bar" }' file
The second block is only reached if the first pattern is false.
You can also use if/else within a single action block.
For your specific case, where you are just adding to the record and printing, you can use this:
awk '/red|green/ { $0 = "# My mod : " $0 } 1' file
The 1 at the end ensures that every record is printed.
How about "painful" long hand
awk '{if (/red/||/green/ ) { print "# My mod : " $0 } else { print $0 }}'
That works for me ;-), but you can save a few chars typing with
awk '{if (/red|green/ ) {print "# My mod : " $0 } else {print $0} }'
OR
awk '{print ($0~/red|green/) ? "# My mod : $0 " : "" $0}'
Which is the shortest amt of code I can think of to achieve the result you are looking for.
awk '{ print (/red|green/ ? "# My mod : " : "") $0 }' file

Set number of lines to variable in awk

I am experimenting with an awk script (an independent file).
I want it to process a text file which looks like this:
value1: n
value2: n
value3: n
value4: n
value5: n
value6: n
value7: n
value1: n
:
The text file contains a lot of these blocks with 7 values in each of them. I want the awk script to print some of these values (name of the value and "n") into a new file or the commandline. I thought I'd process it with a while loop, which works with a variable set to the number of all lines. But I just cant get the total of all lines in the file into a variable. It seems I have to process every line and do something with it until the end of the file to get the total. But I'd like to have the total in a variable and then process it with the while loop which loops until the total is reached.
Do you have an idea?
Where $1 is the input parameter to your script: myscript textfile.txt
count="`wc -l $1 | cut -d' ' -f1`"
echo "Number of lines in $1 is $count"
Then do your awk command utilising $count as your line count
Edit: courtesy of fedorqui
count="`wc -l <$1`"
echo "Number of lines in $1 is $count"
Edit 2: (forgive my awk command it's not something that I use much)
count="`wc -l </etc/fstab`"
echo "Number of lines in /etc/fstab is $count"
awk '{print $0,"\t","\tLine ",NR," of ","'$count'";}' /etc/fstab
Either do two passes over the file:
awk 'NR==FNR { lines = NR; next }
{ ... this happens on the second pass, use lines as you wish ... }' file file
or read the lines into an array and process it in END:
awk '{ a[NR] = $0 }
END { lines = NR; for(i=1; i<=lines; ++i) { line = a[i]; ... } }' file
The first consumes I/O, the second memory.
In more detail,
awk 'NR==FNR { count++; next }
{ print "Item " FNR " of " count ": " $0 }' file file
or similarly
awk '{ a[NR] = $0; }
END { for (i=1; i<=NR; ++i) print "Item " i " of " NR ": " a[i] }' file
If you need the line count outside of Awk, you will need to print it and capture it from your script. If you have an Awk script which performs something useful and produces the count as a side effect, you will want to print the line count to standard output, and take care that all other output is directed somewhere else. Perhaps like this;
lines=$(awk '{ sum += $1; }
END { print "Average: " sum/NR >"average.txt"; print NR }')
# Back in Bash
echo "Total of $lines processed, output in average.txt"

usage of $1 and $2 in awk

As I know in awk, $1 and $2 refer to the first and second field of the file . But can $1 and $2 be used to refer the first and second field of a variable .. Such that if session=5 is stored in a variable. Then I would like to have $1 referring to 'session' and $2 to '5' . Thank you
Input File
session=123
process=90
customer=145
session=123
customer=198
process=90
CODE
awk '$1 ~ /^Session|^CustomerId/' hi|xargs -L 1 -I name '{if (!($1 SUBSEP $2 in a)) {ids[$1]++; a[$1, $2]}} END {for (id in ids) {print "Count of unique", id, " " ids[id]}}'
DETAILS
I will pass the output that I got from first and pipe it via xargs and I have the lines read in "name" variable in xargs .. Now my $1 should correspond to first field of xargs and this is my query
Output
Count of unique sessions=2
Count of unique customer=2
If you want to limit the script to only including "session" and "customer" all you have to do is add the regex to the main script as a selector:
awk -F= '$1 ~ /^(session|customer)$/ {if (!($1 SUBSEP $2 in a)) {ids[$1]++; a[$1, $2]}} END {for (id in ids) {print "Count of unique", id, " " ids[id]}}'
If what you're looking for is a count of unique customers and sessions, then this might do:
awk -F= '
$1~/^(session|customer)$/ && !seen[$0] {
seen[$0]=1;
count[$1]++;
}
END {
printf("Count of sessions: %d\n", count["session"]);
printf("Count of customers: %d\n", count["customer"]);
}' hi
In addition to keeping a count, this keeps an associative array of lines that have contributed a count, to avoid counting lines a second time - thus making it a unique count.
Use the Field Separator, which can be specified inside the BEGIN code block as FS="separator", or as a command line option to awk via -F "separator" This answer shows only the point asked by the question. it does not address the final output.
awk -F"=" '$1 == "session" ||
$1 == "customer" { ids[$1]++ } # do whatever you need with the counters.
END { for (id in ids) {
print "Count, id "=" ids[id] }}' hi
Why don't you just try an all awk solution? It's more simple:
awk -F "=" '$1 ~ /customer|session/ { name[$1]++ } END { for (var in name) print "Count of unique", var"="name[var] }' hi
Results:
Count of unique customer=2
Count of unique session=2
Is there some other reason you need to pipe to xargs?
HTH
Yet an alternative would be
awk -F "=" '$1 ~ /customer|session/ {print $1}'|sort |uniq -c | awk '{print "Count of unique "$2"="$1}'
Here is the answer to the question you deleted:
This is self-contained AWK script based on an answer of mine to one of your earlier questions:
#!/usr/bin/awk -f
/^Customer=/ {
mc[$0, prev]++
if (!($0 in cseen)) {
cust[++custc] = $0
ids["Customer"]++
}
cseen[$0]
}
/^Merchant=/ {
prev = $0
if (!($0 in mseen)) {
merch[++merchc] = $0
ids["Merchant"]++
}
mseen[$0]++
}
END {
for (id in ids) {
print "Count of unique", id, ids[id]
}
for (i = 1; i <= merchc; i++) {
merchant = merch[i]
print "Customers under (" merchant ") is " mseen[merchant]
for (j = 1; j <= custc; j++) {
customer = cust[j]
if (customer SUBSEP merchant in mc) {
print "(" customer ") under (" merchant ") is " mc[customer, merchant]
}
}
}
}
Set it be executable and run it:
$ chmod u+x customermerchant
$ ./customermerchant data.txt