awk system not setting variables properly - awk

I am having a issue in having the output of the grep (used in system() in nawk ) assigned to a variable .
nawk '{
CITIZEN_COUNTRY_NAME = "INDIA"
CITIZENSHIP_CODE=system("grep "CITIZEN_COUNTRY_NAME " /tmp/OFAC/country_codes.config | cut -d # -f1")
}'/tmp/*****
The value IND is displayed in the console but when i give a printf the value of citizenshipcode is 0 - Can you pls help me here
printf("Country Tags|%s|%s\n", CITIZEN_COUNTRY_NAME ,CITIZENSHIP_CODE)
Contents of country_codes.config file
IND#INDIA
IND#INDIB
CAN#CANADA

system returns the exit value of the called command, but the output of the command is not returned to awk (or nawk). To get the output, you want to use getline directly. For example, you might re-write your script:
awk ' {
file = "/tmp/OFAC/country_codes.config";
CITIZEN_COUNTRY_NAME = "INDIA";
FS = "#";
while( getline < file ) {
if( $0 ~ CITIZEN_COUNTRY_NAME ) {
CITIZENSHIP_CODE = $1;
}
}
close( file );
}'

Pre-load the config file with awk:
nawk '
NR == FNR {
split($0, x, "#")
country_code[x[2]] = x[1]
next
}
{
CITIZEN_COUNTRY_NAME = "INDIA"
if (CITIZEN_COUNTRY_NAME in country_code) {
value = country_code[CITIZEN_COUNTRY_NAME]
} else {
value = "null"
}
print "found " value " for country name " CITIZEN_COUNTRY_NAME
}
' country_codes.config filename

Related

Swapping / rearranging of columns and its values based on inputs using Unix scripts

Team,
I have an requirement of changing /ordering the column of csv files based on inputs .
example :
Datafile (source File) will be always with standard column and its values example :
PRODUCTCODE,SITE,BATCHID,LV1P_DESCRIPTION
MK3,Biberach,15200100_3,Biologics Downstream
MK3,Biberach,15200100_4,Sciona Upstream
MK3,Biberach,15200100_5,Drag envois
MK3,Biberach,15200100_8,flatsylio
MK3,Biberach,15200100_1,bioCovis
these columns (PRODUCTCODE,SITE,BATCHID,LV1P_DESCRIPTION) will be standard for source files and what i am looking for solution to format this and generate new file with the columns which we preferred .
Note : Source / Data file will be always comma delimited
Example : if I pass PRODUCTCODE,BATCHID as input then i would like to have only those column and its data extracted from source file and generate new file .
Something like script_name <output_column> <Source_File_name> <target_file_name>
target file example :
PRODUCTCODE,BATCHID
MK3,15200100_3
MK3,15200100_4
MK3,15200100_5
MK3,15200100_8
MK3,15200100_1
if i pass output_column as "LV1P_DESCRIPTION,PRODUCTCODE" then out file should be like below
LV1P_DESCRIPTION,PRODUCTCODE
Biologics Downstream,MK3
Sciona Upstream,MK3
Drag envios,MK3
flatsylio,MK3
bioCovis,MK3
It would be great if any one can help on this.
I have tried using some awk scripts (got it from some site) but it was not working as expected , since i don't have unix knowledge finding difficulties to modify this .
awk code:
BEGIN {
FS = ","
}
NR==1 {
split(c, ca, ",")
for (i = 1 ; i <= length(ca) ; i++) {
gsub(/ /, "", ca[i])
cm[ca[i]] = 1
}
for (i = 1 ; i <= NF ; i++) {
if (cm[$i] == 1) {
cc[i] = 1
}
}
if (length(cc) == 0) {
exit 1
}
}
{
ci = ""
for (i = 1 ; i <= NF ; i++) {
if (cc[i] == 1) {
if (ci == "") {
ci = $i
} else {
ci = ci "," $i
}
}
}
print ci
}
the above code is saves as Remove.awk and this will be called by another scripts as below
var1="BATCHID,LV2P_DESCRIPTION"
## this is input fields values used for testing
awk -f Remove.awk -v c="${var1}" RESULT.csv > test.csv
The following GNU awk solution should meet your objectives:
awk -F, -v flds="LV1P_DESCRIPTION,PRODUCTCODE" 'BEGIN { split(flds,map,",") } NR==1 { for (i=1;i<=NF;i++) { map1[$i]=i } } { printf "%s",$map1[map[1]];for(i=2;i<=length(map);i++) { printf ",%s",$map1[map[i]] } printf "\n" }' file
Explanation:
awk -F, -v flds="LV1P_DESCRIPTION,PRODUCTCODE" ' # Pass the fields to print as a variable field
BEGIN {
split(flds,map,",") # Split fld into an array map using , as the delimiter
}
NR==1 { for (i=1;i<=NF;i++) {
map1[$i]=i # Loop through the header and create and array map1 with the column header as the index and the column number the value
}
}
{ printf "%s",$map1[map[1]]; # Print the first field specified (index of map)
for(i=2;i<=length(map);i++) {
printf ",%s",$map1[map[i]] # Loop through the other field numbers specified, printing the contents
}
printf "\n"
}' file

How to print out the file name that is being passed as an argument to awk program

I use a for loop to iterate a list of files and at each iteraction I pass one file name to an awk script.
I would like to print, not only the error code but also the name of the file that generated the error.
Something like : print error";"THENAMEOF_THE_FILE >> file.txt;
The awk script would look like this:
awk ' function errorManager(error)
{
print error >> file.txt;
}
BEGIN {error1="ERROR CODE X"}
{if (NR==1)
if(length($0) != 10)
{
errorManager(error1)
}
}
END{print "STOP"}' $1
This is what you're looking for:
awk '
function errorManager(idx, errorArr, error) {
errorArr[1] = "ERROR CODE X"
error = (idx in errorArr ? errorArr[idx] : "Unspecified Error")
printf "Error %s[%d]: %s\n", FILENAME, FNR, error | "cat>&2"
}
NR==1 {
if ( length($0) != 10 ) {
errorManager(1)
}
}
END{ print "STOP" }
' "$1" 2>"file.txt"

awk: reformat date in a list of data-files (mass edit)

I want to awk a list of data-files. All records - there is an unknown number of records before - before , e.g.,
/10-12-2014 06:47:59/{p=1}
are to be skipped.
A brief template of one data file looks like this:
data_file_001
0; n records to be skipped
1;10-12-2014 06:47:59;
2;12-12-2014 10:17:44;
3;12-12-2014 10:37:44;
4;14-12-2014 10:00:32;
5;;movefield
6;16-12-2014 04:15:39;
needed Output ($2 datefield reformatted and $3 moved to $4):
colnum;date;col3;col4;col5
2;12.12.14;;
3;12.12.14;;
4;14.12.14;;
5;;;movefield;moved
6;16.12.14;;
My source file is this at the moment:
BEGIN { OFS=FS=";" ; print "colnum;date;col3;col4;col5"}
FNR == 1 { p=0 }
$3 == "movefield" { $4 = $3; $5 = "moved"; $3 = ""}
#(x=index($2," ") > 0) {DDMMYY = substr($2,1,x-1)}
$2=substr($2,1,11)
p!=0{print};
/10-12-2014 06:47:59/{p=1}
I have problems to reformat the data fields: The pattern-action (x=index($2," ") > 0) {DDMMYY = substr($2,1,x-1)} does not work nor $2=substr($2,1,11) in conjunction with the movefield action. Notice that the record where the movefield field appears has no date field.
Please have in mind that the awk is meant to be used on a bunch of files (loop).
With GNU awk for implace editing, no loop required:
awk -i inplace '
BEGIN { OFS=FS=";" ; print "colnum","date","col3","col4","col5" }
FNR==1 { next }
$3 == "movefield" { $4 = $3; $5 = "moved"; $3 = ""; print; next }
{ sub(/ .*/,"",$2); gsub(/-/,".",$2); print $0, ""}
' file*
Another in GNU awk:
$ awk '
function refmt(str) { # reformat date for comparing
split(str,d,"[ :-]")
return mktime(d[3] " " d[2] " " d[1] " " d[4] " " d[5] " " d[6])
}
BEGIN {
FS=OFS=";"
start=refmt("10-12-2014 06:47:59") # reformat the threshold date
print "colnum","date","col3","col4" # print header (why 5?)
}
refmt($2)>start || $2=="" { # if date > start or empty
sub(/ .*/,"",$2) # delete time part
gsub(/-/,".",$2) # replace - by .
$4=$3; $3="" # or $3 = OFS $3
print # output
}' file
colnum;date;col3;col4
2;12.12.2014;;
3;12.12.2014;;
4;14.12.2014;;
5;;;movefield
6;16.12.2014;;

Pass variable as parameter to awk file in TCL

I want to pass variable to awk file in TCL which is somewhat similar to below code. Is there any way?
// out.tr is input file and variable to be passed is to_node
my_code.tcl:
set to_node 13
exec awk -f check_ack.awk out.tr $to_node
check_ack.awk:
BEGIN {}
{
# variable passed should be saved in node[i] --> But this doesnot work
for ( i = 0; i < ARGC; i++ ) {
node[i] = ARGV[i]
}
status = $7
if(FILENAME=="trace.tr") {
if(node[1] > -1 && $1 == "s" node[1] == $3 && status == "ack") {
print "Node" " "node[1]" " "has not sent acknowledgement" > "failed.txt"
}
}
}
END {
exit 0
}
You pass variables like this:
awk -f some/script.awk -v var="my value"
Now you can access your variable var in awk and if you print it, you'll see it contains my value.

awk | Rearrange fields of CSV file on the basis of column value

I need you help in writing awk for the below problem. I have one source file and required output of it.
Source File
a:5,b:1,c:2,session:4,e:8
b:3,a:11,c:5,e:9,session:3,c:3
Output File
session:4,a=5,b=1,c=2
session:3,a=11,b=3,c=5|3
Notes:
Fields are not organised in source file
In Output file: fields are organised in their specific format, for example: all a values are in 2nd column and then b and then c
For value c, in second line, its coming as n number of times, so in output its merged with PIPE symbol.
Please help.
Will work in any modern awk:
$ cat file
a:5,b:1,c:2,session:4,e:8
a:5,c:2,session:4,e:8
b:3,a:11,c:5,e:9,session:3,c:3
$ cat tst.awk
BEGIN{ FS="[,:]"; split("session,a,b,c",order) }
{
split("",val) # or delete(val) in gawk
for (i=1;i<NF;i+=2) {
val[$i] = (val[$i]=="" ? "" : val[$i] "|") $(i+1)
}
for (i=1;i in order;i++) {
name = order[i]
printf "%s%s", (i==1 ? name ":" : "," name "="), val[name]
}
print ""
}
$ awk -f tst.awk file
session:4,a=5,b=1,c=2
session:4,a=5,b=,c=2
session:3,a=11,b=3,c=5|3
If you actually want the e values printed, unlike your posted desired output, just add ,e to the string in the split() in the BEGIN section wherever you'd like those values to appear in the ordered output.
Note that when b was missing from the input on line 2 above, it output a null value as you said you wanted.
Try with:
awk '
BEGIN {
FS = "[,:]"
OFS = ","
}
{
for ( i = 1; i <= NF; i+= 2 ) {
if ( $i == "session" ) { printf "%s:%s", $i, $(i+1); continue }
hash[$i] = hash[$i] (hash[$i] ? "|" : "") $(i+1)
}
asorti( hash, hash_orig )
for ( i = 1; i <= length(hash); i++ ) {
printf ",%s:%s", hash_orig[i], hash[ hash_orig[i] ]
}
printf "\n"
delete hash
delete hash_orig
}
' infile
that splits line with any comma or colon and traverses all odd fields to save either them and its values in a hash to print at the end. It yields:
session:4,a:5,b:1,c:2,e:8
session:3,a:11,b:3,c:5|3,e:9