awk and gawk decimal integer comparison fails some time - awk

I've written an AWK script to scan a check image log file (ASCII characters sent to laser or dot-matrix printer to print on preprinted check forms) kept with tee /tmp/$$.print for every check run. The goal is to add up the check tab invoice / discount values and compare to the dollar and cents and check amount printed on the check body. The script work as desired but fails unexpectedly where I can see no reason for the failure. Out of 750 check images processed, 37 checks are unexpectedly being included in the collected list of checks where the sum of the check-tab invoices does not equal the amount of the check with nine checks found with actual differences:
37 /tmp/eq_check
Check # 62110 04/07/2022 Sum tab 2240.45 Check amount 2240.45
Check # 62131 04/07/2022 Sum tab 2099.22 Check amount 2099.22
Check # 62134 04/07/2022 Sum tab 5124.40 Check amount 5124.40
Check # 63143 04/14/2022 Sum tab 536.58 Check amount 536.58
Check # 63148 04/14/2022 Sum tab 2354.18 Check amount 2354.18
Check # 63155 04/28/2022 Sum tab 1276.55 Check amount 1276.55
...
Check # 75161 12/09/2022 Sum tab 614.41 Check amount 614.41
Check # 75172 12/09/2022 Sum tab 17445.24 Check amount 17445.24
Check # 75176 12/09/2022 Sum tab 1194.85 Check amount 1194.85
Check # 75179 12/09/2022 Sum tab 264.10 Check amount 264.10
9 /tmp/neq_check
Check # 62122 04/07/2022 Sum tab 366.24 Check amount 150.00
Check # 63199 05/10/2022 Sum tab 22310.65 Check amount 21274.66
Check # 63268 06/09/2022 Sum tab 36086.37 Check amount 35918.21
Check # 63310 06/30/2022 Sum tab 16841.02 Check amount 14652.00
Check # 63429 09/07/2022 Sum tab 5955.87 Check amount 5707.53
Check # 63449 09/12/2022 Sum tab 947268177.91 Check amount 28064.91
Check # 75010 09/26/2022 Sum tab 562.82 Check amount 314.48
Check # 75054 10/21/2022 Sum tab 10052.77 Check amount 9804.43
Check # 75113 11/10/2022 Sum tab 19821.61 Check amount 7381.69
After I composed this post, It occurs to me to try changing the test for not equal to
if ( ( tab_total - pcheck_amt) != 0 ) to see if that works
Nope, same 37 false positives and 9 positives.
here is a test check:
# cat check63282
99820989 20220616 326.10
Discount -3.26
63282 06/21/2022 MU
$322.84
Three Hundred Twenty Two Dollars and 84 Cents********************************
#
Here is the code that is failing:
# upper tab example
# 947897461 20221024 76.00 947992349 20221031 1161.30
# Discount -1.52 Discount -23.23
# 947897457. 20221024 6754.59 94793360 20221029 5731.54
# Discount -135.09 Discount -114.63
# SHIP & DEBIT 20221027 -25,866.38 947973361 20221029 1,386.00
# 947945737 20221027 28,325.70 Discount -27.72
# Discount -566.51 947973365 20221029 312.00
# 947945740 20221027 404.00 Discount -6.24
#
#
#
# Check body example
# 63449 09/12/2022 BM
#
#
#
# $28,064.91
# Twenty Eight Thousand Sixty Four Dollars and 91 Cents***********************
#
# index(s,t)
# Returns the position in string s where string t first
# occurs, or 0 if it does not occur at all.
BEGIN{ tab_total = 0 }
{
gsub(/,/, "") # strip NN,NNN.NN -> NNNNN.NN
gsub(/\$/, "") # strip $NNN.NN -> NNN.NN
gsub(/^M/, "") # strip DOS line ending
# Find a line with decimal point
a= index($0,".")
if ( a > 0 && a < 50 ) {
for( i = 1; i <= NF; i++) {
b = index($i,".")
if( b > 0 ) {
tab_total+= ($i * 100)
}
}
}
# Find the date line
c= index($0,"/")
if ( c > 50 ) {
check_num = $1
check_date = $2
if( NF > 2 ) who_to = $3
}
# Find the printed check amount
if ( a > 50 ) {
pcheck_amt = ($0+0) * 100
}
if( $0 ~ /Dollars and/ ) {
# found check body.
gsub(/\*\*$/, "", $0)
if ( pcheck_amt != tab_total) {
printf"\n Check # %6d %s Sum tab %10.2f Check amount %7.2f\n %s\
n", check_num, check_date, tab_total/100, pcheck_amt/100, $0
}
tab_total=0
}
}
With debugging added:
BEGIN{ tab_total = pcheck_amt = 0 }
{
gsub(/,/, "")
gsub(/\$/, "")
gsub(/^M/, "", $0)
# Find a line with decimal point
a= index($0,".")
c= index($0,"/")
if ( a > 0 && a < 50 ) {
#print "a = ",a," ", $0
for( i = 1; i <= NF; i++) {
b = index($i,".")
print "b = ",b," ", $0
if( b > 0 ) {
print "before Tab total= ", tab_total
tab_total+= ($i * 100)
print "after Tab total= ", tab_total
}
}
}
# Find the date line
if ( c > 50 ) {
check_num = $1
check_date = $2
if( NF > 2 ) who_to = $3
}
# Find the printed check amount
if ( a > 50 ) {
print "a= ",a," ",$1
pcheck_amt = ($1+0) * 100
print "$1 = ", $1," *100 = ", (($1+0) * 100 )
}
#print $0
if( $0 ~ /Dollars and/ ) {
# found check body.
gsub(/\*\*$/, "", $0)
printf "RAW tab_total %d format %%d\n", tab_total
printf "RAW pcheck_amt %d format %%d\n", pcheck_amt
printf "RAW pcheck_amt %f format %%f\n", pcheck_amt
printf "RAW pcheck_amt/100 %d format %%d\n", pcheck_amt/100
printf "tab_total - pcheck_amt %f\n", tab_total - pcheck_amt
printf "pcheck_amt - tab_total %f\n", pcheck_amt - tab_total
if (( tab_total - pcheck_amt) == 0 ) print "true"
if ( pcheck_amt != tab_total) {
printf"\n Check # %6d %s Sum tab %10.2f Check amount %7.2f\n %s\
n", check_num, check_date, tab_total/100, pcheck_amt/100, $0
}
tab_total=0
}
}
And the output of the check above
# cat check63282 | gawk -f bbprint_scan.awk
b = 0 99820989 20220616 326.10
b = 0 99820989 20220616 326.10
b = 4 99820989 20220616 326.10
before Tab total= 0
after Tab total= 32610
b = 0 Discount -3.26
b = 3 Discount -3.26
before Tab total= 32610
after Tab total= 32284
a= 74 322.84
$1 = 322.84 *100 = 32284
RAW tab_total 32284 format %d
RAW pcheck_amt 32283 format %d
RAW pcheck_amt 32284.000000 format %f
RAW pcheck_amt/100 322 format %d
tab_total - pcheck_amt 0.000000 format %f
pcheck_amt - tab_total -0.000000 format %f
tab_total - pcheck_amt 7.275958e-12 format %e
pcheck_amt - tab_total -7.275958e-12 format %e
Check # 63282 06/21/2022 Sum tab 322.84 Check amount 322.84
Three Hundred Twenty Two Dollars and 84 Cents******************************
#

side note :
gsub(/^M/, "") # strip DOS line ending
^M is what some apps use to visual the invisible byte \r, but when u directly try to use that in a regex, what it actually ends up doing is stripping an ASCII capital letter M whenever it's the first letter in the entire line.
if u wanna deal with line endings, do this instead
RS = "\r?\n" # preferred
or
sub("\15$", "")

Related

Divide largest value by second largest value

I am having a file in the following format. Column one has ~20,000 uniq entry and column 2 has ~120,000 different entry and column 3 has count associated with column 2. For a single entry in column 1 there can be multiple entry in column 2. For each unique entry in column 1, I am trying to get ratio of maximum value to second maximum value of column 3.
F1.txt
S1 S2 C1
A A1 1
A AA 10
A A6 5
A A0 4
B BB 12
B BC 11
B B1 19
B B9 4
Expected Output
S1 S2 C1
B B1 19 1.58333
A AA 10 2
I can do in steps like bellow. But is there a smart way of doing in in one script?
awk 'NR==1; NR > 1 {print $0 | "sort -k3 -n -r "}' F1.txt | awk '!seen[$1]++' >del1.txt
awk 'FNR==NR{a[$2]=1; next}FNR==1{print $0;}!a[$2]' del1.txt F1.txt | awk 'NR==1; NR > 1 {print $0 | "sort -k3 -n -r"}' | awk '!seen[$1]++' >del2.txt
awk 'FNR==NR{a[$1]=$3; next}FNR==1{print $0"\t";"RT"}FNR>1 a[$1]{print $0"\t"$3/a[$1]}' del2.txt del1.txt
#!/usr/bin/awk -f
NR == 1 { print $1, $2, $3; next }
{ data[$1][$3] = $2 }
END {
for (key in data) {
asorti(data[key], s, "#ind_num_desc")
print key, data[key][s[1]], s[1], s[1] / s[2]
}
}
This^^^ assumes an arbitrary permutation of the lines (and requires gawk (which is pretty common) or another implementation with native multi-dimensional “arrays”).
If you can make more assumptions about the input — e.g. that it is always grouped by the first column —, then you can make it more memory-efficient and get rid of multi-dimensional arrays (by not delaying the evaluation until END and instead calculating it in a per-line block each time the first column’s value changes (and then one last time in END).)
To get a different handling of equal numeric values (e.g. to report the “subkey” (column 2) of the first (instead of last) encountered occurrence of a value), you could add if (!($3 in data[$1])) ... or the like.
Whenever you find yourself creating a pipeline containing awk, there is a very good chance that what you are trying to do can be done in a single call to awk much more efficiently.
A non-GNU awk approach that presumes all field1 'A' records are together and all 'B' records are together (as you show in your sample data) could be:
awk '
FNR==1 { print; next } ## 1st line, output heading
$1 != n { ## 1st field changed
if (n) { ## if n set, output result of last block
printf "%s\t%s\n", rec, max / nextmax
}
rec = $0 ## initialize vars for next block
n = $1
max = $3
nextmax = 1
next ## skip to next record
}
{
if ($3 > max) { ## check if 3rd field > max
rec = $0 ## save record
nextmax = max ## update nextmax
max = $3 ## update max
}
else if ($3 > nextmax) { ## if 3rd field > nextmax
nextmax = $3 ## update nextmax
}
} ## output final block results
END { printf "%s\t%s\n", rec, max / nextmax }
' file
Example Use/Output
With your data in the file file, you would have:
$ awk '
> FNR==1 { print; next } ## 1st line, output heading
> $1 != n { ## 1st field changed
> if (n) { ## if n set, output result of last block
> printf "%s\t%s\n", rec, max / nextmax
> }
> rec = $0 ## initialize vars for next block
> n = $1
> max = $3
> nextmax = 1
> next ## skip to next record
> }
> {
> if ($3 > max) { ## check if 3rd field > max
> rec = $0 ## save record
> nextmax = max ## update nextmax
> max = $3 ## update max
> }
> else if ($3 > nextmax) { ## if 3rd field > nextmax
> nextmax = $3 ## update nextmax
> }
> } ## output final block results
> END { printf "%s\t%s\n", rec, max / nextmax }
> ' file
S1 S2 C1
A AA 10 2
B B1 19 1.58333
Using any awk in any shell on every Unix box and using almost no memory (important since your input file would be huge given your description of it):
$ cat tst.awk
BEGIN { FS=OFS="\t" }
NR == 1 { print; next }
$1 != prev {
if ( prev != "" ) {
print prev, val, max, (preMax ? max/preMax : 0)
}
prev = $1
max = ""
}
(max == "") || ($3 > max) {
val = $2
preMax = max
max = $3
}
END { print prev, val, max, (preMax ? max/preMax : 0) }
$ awk -f tst.awk F1.txt
S1 S2 C1
A AA 10 10
B B1 19 1.58333

awk: manipulations with multi-column data

The following AWK script (being a part of the bash code) extracts numbers from selected columns of input.csv as well as do some simple stat operations of these numbers, eventually saving the results as 1 line in output.csv:
awk -F ", *" ' # set field separator to comma, followed by 0 or more whitespaces
FNR==1 {
if (n) { # calculate the results of previous file
m = s / n # mean
var = s2 / n - m * m # variance
if (var < 0) var = 0 # avoid an exception due to round-off error
mean[suffix] = m # store the mean in an array
rmsd[suffix] = sqrt(var)
lowest[suffix] = min # lowest dG
highest[suffix] = fourth # dG in cluster with highest pop
}
prefix=suffix=FILENAME
sub(/_.*/, "", prefix)
sub(/\/[^\/]+$/, "", suffix)
sub(/^.*_/, "", suffix)
s = 0 # sum of $3
s2 = 0 # sum of $3 ** 2
n = 0 # count of samples
min = 0 # lowest value of $3 (assuming all $3 < 0)
max = 0 # highest value of $2 (assuming all $2 > 0)
}
FNR > 1 {
s += $3
s2 += $3 * $3
++n
if ($3 < min) min = $3 # update the lowest value
if ($2 > max) {
max = $2 # update popMAX
fourth = $3 # update the value of dG corresponded to topPOP
}
}
END {
if (n) { # just to avoid division by zero
m = s / n
var = s2 / n - m * m
if (var < 0) var = 0
mean[suffix] = m
rmsd[suffix] = sqrt(var)
lowest[suffix] = min # most negative dG
highest[suffix] = fourth # dG in a cluster with pop(MAX)
}
print "Lig(CNE)", "dG(min)", "dG(popMAX)", "dG(mean)"
for (i in mean)
printf "%s %.2f %.2f %.2f\n", i, lowest[i], highest[i], mean[i]
}' input.csv > output.csv
While operating with input.csv (shown below), it extracts numbers from the third column (dG) of the log: i) detecting the minimal value in the third column (dG(min) which always corresponds to the line with ID=1), as well as the number of the dG corresponded to the maximal number in the second column (POPmax):
# input.csv from the folder 10V1_cne_lig12
ID, POP, dG
1, 142, -5.6500 # this is dG min to be extracted
2, 10, -5.5000
3, 2, -4.9500
4, 150, -4.1200 # this is dG corresponded to pop(MAX) to be extracted
finally it saves the results in another multi-column output.csv file, contained a part of the name of each processed CSV (with corresponded prefix used as the ID of the line), as well as information regarding its dG(min), dG(popMAX) as well as mean value calculated for all numbers in 3rd (dG) column:
# output.csv
Lig(CNE) dG(min) dG(popMAX) dG(mean)
lig12 -5.65 -4.12 −5.055
So dG(min) is the number of $2 (dG) from the line with ID=1 of input.csv (lowest dG) and dG(popMAX) correspond to the value dG detected in the line, which has highest value in $2 (POP)
I need to modify the AWK part of my script adding two additional columns to the output.csv with the information regarding 2nd column of input.csv (POP) for each of the corresponded dG value (the has been taken from the 3rd column of the same log). So the same log should be like this
# output.csv
Lig(CNE). dG(min) POP(min) dG(popMAX) POP(max) dG(mean)
lig12 -5.65 (142) -4.12 (150) −5.055
In other worlds, additionally to the operations performed on the 3rd column, I need to consider the numbers from the 2nd column and then match both of them in the output.csv: so the POP(min) should be taken from the $2 of the first line (with dG(min) ) and POP(max) from $2 of the line with dG(popMAX).
I have tried to define a 2nd column infiormation using
'{print $2}'
but the resulted output.csv did not match the order of the lines of the original input.csv (for instance it took the 2nd column from the line not belonged to dG(min) etc)
Would you please try:
awk -F ", *" ' # set field separator to comma, followed by 0 or more whitespaces
FNR==1 {
if (n) { # calculate the results of previous file
m = s / n # mean
var = s2 / n - m * m # variance
if (var < 0) var = 0 # avoid an exception due to round-off error
mean[suffix] = m # store the mean in an array
rmsd[suffix] = sqrt(var)
lowest[suffix] = min # lowest dG
highest[suffix] = fourth # dG in cluster with highest pop
pop_min[suffix] = popmin # pop in cluster with lowest dG
pop_max[suffix] = max # highest pop
}
prefix=suffix=FILENAME
sub(/_.*/, "", prefix)
sub(/\/[^\/]+$/, "", suffix)
sub(/^.*_/, "", suffix)
s = 0 # sum of $3
s2 = 0 # sum of $3 ** 2
n = 0 # count of samples
min = 0 # lowest value of $3 (assuming all $3 < 0)
max = 0 # highest value of $2 (assuming all $2 > 0)
}
FNR > 1 {
s += $3
s2 += $3 * $3
++n
if ($3 < min) {
min = $3 # update the lowest value
popmin = $2 # newly introduced variable
}
if ($2 > max) {
max = $2 # update popMAX
fourth = $3 # update the value of dG corresponded to topPOP
}
}
END {
if (n) { # just to avoid division by zero
m = s / n
var = s2 / n - m * m
if (var < 0) var = 0
mean[suffix] = m
rmsd[suffix] = sqrt(var)
lowest[suffix] = min # most negative dG
highest[suffix] = fourth # dG in a cluster with pop(MAX)
pop_min[suffix] = popmin # pop in cluster with lowest dG
pop_max[suffix] = max # highest pop
}
print "Lig(CNE)", "dG(min)", "POP(dGmin)", "dG(popMAX)", "POP(max)", "dG(mean)"
for (i in mean)
printf "%s %.2f (%d) %.2f (%d) %.2f\n", i, lowest[i], pop_min[i], highest[i], pop_max[i], mean[i]
}' input.csv
The highest pop associated with dG(popMAX) is already assigned to
the existing variable max.
The variable popmin has been introduced to hold the pop value
associated with the lowest dG. The variable is updated whenever min
(the lowest dG) is updated.
BTW the variable names such as min or max are getting less
self-explanatory due to the repeated extensions. It may be better to
rename them and/or refactor the code for future maintainability.

Concatenate columns and adds digits awk

I have a csv file:
number1;number2;min_length;max_length
"40";"1801";8;8
"40";"182";8;8
"42";"32";6;8
"42";"4";6;6
"43";"691";9;9
I want the output be:
4018010000;4018019999
4018200000;4018299999
42320000;42329999
423200000;423299999
4232000000;4232999999
42400000;42499999
43691000000;43691999999
So the new file will be consisting of:
column_1 = a concatenation of old_column_1 + old_column_2 + a number
of "0" equal to (old_column_3 - length of the old_column_2)
column_2 = a concatenation of old_column_1 + old_column_2 + a number of "9" equal
to (old_column_3 - length of the old_column_2) , when min_length = max_length. And when min_length is not equal with max_length , I need to take into account all the possible lengths. So for the line "42";"32";6;8 , all the lengths are: 6,7 and 8.
Also, i need to delete the quotation mark everywhere.
I tried with paste and cut like that:
paste -d ";" <(cut -f1,2 -d ";" < file1) > file2
for the concatenation of the first 2 columns, but i think with awk its easier. However, i can't figure out how to do it. Any help it's apreciated. Thanks!
Edit: Actually, added column 4 in input.
You may use this awk:
awk 'function padstr(ch, len, s) {
s = sprintf("%*s", len, "")
gsub(/ /, ch, s)
return s
}
BEGIN {
FS=OFS=";"
}
{
gsub(/"/, "");
for (i=0; i<=($4-$3); i++) {
d = $3 - length($2) + i
print $1 $2 padstr("0", d), $1 $2 padstr("9", d)
}
}' file
4018010000;4018019999
4018200000;4018299999
42320000;42329999
423200000;423299999
4232000000;4232999999
42400000;42499999
43691000000;43691999999
With awk:
awk '
BEGIN{FS = OFS = ";"} # set field separator and output field separator to be ";"
{
$0 = gensub("\"", "", "g"); # Drop double quotes
s = $1$2; # The range header number
l = $3-length($2); # Number of zeros or 9s to be appended
l = 10^l; # Get 10 raised to that number
print s*l, (s+1)*l-1; # Adding n zeros is multiplication by 10^n
# Adding n nines is multipliaction by 10^n + (10^n - 1)
}' input.txt
Explanation inline as comments.

How substract millisecond with AWK - script

I'm trying to create an awk script to subtract milliseconds between 2 records joined-up for example:
By command line I might do this:
Input:
06:20:00.120
06:20:00.361
06:20:15.205
06:20:15.431
06:20:35.073
06:20:36.190
06:20:59.604
06:21:00.514
06:21:25.145
06:21:26.125
Command:
awk '{ if ( ( NR % 2 ) == 0 ) { printf("%s\n",$0) } else { printf("%s ",$0) } }' input
I'll obtain this:
06:20:00.120 06:20:00.361
06:20:15.205 06:20:15.431
06:20:35.073 06:20:36.190
06:20:59.604 06:21:00.514
06:21:25.145 06:21:26.125
To substract milliseconds properly:
awk '{ if ( ( NR % 2 ) == 0 ) { printf("%s\n",$0) } else { printf("%s ",$0) } }' input| awk -F':| ' '{print $3, $6}'
And to avoid negative numbers:
awk '{if ($2<$1) sub(/00/, "60",$2); print $0}'
awk '{$3=($2-$1); print $3}'
The goal is get this:
Call 1 0.241 ms
Call 2 0.226 ms
Call 3 1.117 ms
Call 4 0.91 ms
Call 5 0.98 ms
And finally and average.
I might perform this but command by command. I dunno how to place this into a script.
Please need help.
Using awk:
awk '
BEGIN { cmd = "date +%s.%N -d " }
NR%2 {
cmd $0 | getline var1;
next
}
{
cmd $0 | getline var2;
var3 = var2 - var1;
print "Call " ++i, var3 " ms"
}
' file
Call 1 0.241 ms
Call 2 0.226 ms
Call 3 1.117 ms
Call 4 0.91 ms
Call 5 0.98 ms
One way using awk:
Content of script.awk:
## For every input line.
{
## Convert formatted dates to time in miliseconds.
t1 = to_ms( $0 )
getline
t2 = to_ms( $0 )
## Calculate difference between both dates in miliseconds.
tr = (t1 >= t2) ? t1 - t2 : t2 - t1
## Print to output with time converted to a readable format.
printf "Call %d %s ms\n", ++cont, to_time( tr )
}
## Convert a date in format hh:mm:ss:mmm to miliseconds.
function to_ms(time, time_ms, time_arr)
{
split( time, time_arr, /:|\./ )
time_ms = ( time_arr[1] * 3600 + time_arr[2] * 60 + time_arr[3] ) * 1000 + time_arr[4]
return time_ms
}
## Convert a time in miliseconds to format hh:mm:ss:mmm. In case of 'hours' or 'minutes'
## with a value of 0, don't print them.
function to_time(i_ms, time)
{
ms = int( i_ms % 1000 )
s = int( i_ms / 1000 )
h = int( s / 3600 )
s = s % 3600
m = int( s / 60 )
s = s % 60
# time = (h != 0 ? h ":" : "") (m != 0 ? m ":" : "") s "." ms
time = (h != 0 ? h ":" : "") (m != 0 ? m ":" : "") s "." sprintf( "%03d", ms )
return time
}
Run the script:
awk -f script.awk infile
Result:
Call 1 0.241 ms
Call 2 0.226 ms
Call 3 1.117 ms
Call 4 0.910 ms
Call 5 0.980 ms
If you're not tied to awk:
to_epoch() { date -d "$1" "+%s.%N"; }
count=0
paste - - < input |
while read t1 t2; do
((count++))
diff=$(printf "%s-%s\n" $(to_epoch "$t2") $(to_epoch "$t1") | bc -l)
printf "Call %d %5.3f ms\n" $count $diff
done

calculate the difference from flat file

I have a text file and the last 2 lines look like this...
Uptime: 822832 Threads: 32 Questions: 13591705 Slow queries: 722 Opens: 81551 Flush tables: 59 Open tables: 64 Queries per second avg: 16.518
Uptime: 822893 Threads: 31 Questions: 13592768 Slow queries: 732 Opens: 81551 Flush tables: 59 Open tables: 64 Queries per second avg: 16.618
How do I find the difference between the two values of each parameter?
The expected output is:
61 -1 1063 10 0 0 0 0.1
In other words I will like to deduct the current uptime value from the earlier uptime.
Find the difference between the threads and Questions and so on.
The purpose of this exercise is to watch this file and alert the user when the difference is too high. For e.g. if the slow queries are more than 500 or the "Questions" parameter is too low (<100)
(It is the MySQL status but has nothing to do with it, so mysql tag does not apply)
Just a slight variation on ghostdog74's (original) answer:
tail -2 file | awk ' {
gsub(/[a-zA-Z: ]+/," ")
m=split($0,a," ");
for (i=1;i<=m;i++)
if (NR==1) b[i]=a[i]; else print a[i] - b[i]
} '
here's one way. tail is used to get the last 2 lines, especially useful in terms of efficiency if you have a big file.
tail -2 file | awk '
{
gsub(/[a-zA-Z: ]+/," ")
m=split($0,a," ")
if (f) {
for (i=1;i<=m;i++){
print -(b[i]-a[i])
}
# to check for Questions, slow queries etc
if ( -(b[3]-a[3]) < 100 ){
print "Questions parameter too low"
}else if ( -(b[4]-a[4]) > 500 ){
print "Slow queries more than 500"
}else if ( a[1] - b[1] < 0 ){
print "mysql ...... "
}
exit
}
for(i=1;i<=m;i++ ){ b[i]=a[i] ;f=1 }
} '
output
$ ./shell.sh
61
-1
1063
10
0
0
0
0.1
gawk:
BEGIN {
arr[1] = "0"
}
length(arr) > 1 {
print $2-arr[1], $4-arr[2], $6-arr[3], $9-arr[4], $11-arr[5], $14-arr[6], $17-arr[7], $22-arr[8]
}
{
arr[1] = $2
arr[2] = $4
arr[3] = $6
arr[4] = $9
arr[5] = $11
arr[6] = $14
arr[7] = $17
arr[8] = $22
}