awk nesting curling brackets - awk

I have the following awk script where I seem to need to next curly brackets. But this is not allowed in awk. How can I fix this issue in my script here?
The problem is in the if(inqueued == 1).
BEGIN {
print "Log File Analysis Sequencing for " + FILENAME;
inqueued=0;
connidtext="";
thisdntext="";
}
/message EventQueued/ {
inqueued=1;
print $0;
}
if(inqueued == 1) {
/AttributeConnID/ { connidtext = $0; }
/AttributeThisDN / { thisdntext = $2; } #space removes DNRole
}
#if first chars are a timetamp we know we are out of queued text
/\#?[0-9]+:[0-9}+:[0-9]+/
{
if(thisdntext != 0) {
print connidtext;
print thisdntext;
}
inqueued = 0; connidtext=""; thisdntext="";
}

try to change
if(inqueued == 1) {
/AttributeConnID/ { connidtext = $0; }
/AttributeThisDN / { thisdntext = $2; } #space removes DNRole
}
to
inqueued == 1 {
if($0~ /AttributeConnID/) { connidtext = $0; }
if($0~/AttributeThisDN /) { thisdntext = $2; } #space removes DNRole
}
or
inqueued == 1 && /AttributeConnID/{connidtext = $0;}
inqueued == 1 && /AttributeThisDN /{ thisdntext = $2; } #space removes DNRole

awk is made up of <condition> { <action> } segments. Within an <action> you can specify conditions just like you do in C with if or while constructs. You have a few other problems too, just re-write your script as:
BEGIN {
print "Log File Analysis Sequencing for", FILENAME
}
/message EventQueued/ {
inqueued=1
print
}
inqueued == 1 {
if (/AttributeConnID/) { connidtext = $0 }
if (/AttributeThisDN/) { thisdntext = $2 } #space removes DNRole
}
#if first chars are a timetamp we know we are out of queued text
/\#?[0-9]+:[0-9}+:[0-9]+/ {
if (thisdntext != 0) {
print connidtext
print thisdntext
}
inqueued=connidtext=thisdntext=""
}
I don't know if that'll do what you want or not, but it's syntactically correct at least.

Related

how to optimize this awk script?

I browse 2 files with awk. I browse the first file and store the columns I need in arrays. I use after these arrays to make a comparison with a column (8) of the second file.
my script runs very slowly. I would like to know if there is not a way to optimize it?
FNR==NR
{
a[$1];
ip[NR]=$1;
site[NR]=$2;
next
}
BEGIN{
FS="[\t,=]";
OFS="|";
}
sudo awk -f{
l=length(ip);
if($8 in a)
{
for(k=0;k<=l;k++)
{
if(ip[k]== $8)
{
if(NF <= 70)
{
print "siteID Ipam: "site[k],"siteID zsc: "$14,"date: " $4,"src: "$8,"dst: "$10,"role: "$22,"urlcategory: "$36, "urlsupercategory: "$38,"urlclass: "$40;
}
else
{
print "siteID Ipam: "site[k], "siteID zsc: "$14,"date: " $4, "src: " $8, "dst: " $10, "role: "$22, "urlcategory: " $37, "urlsupercategory: "$39, "urlclass: $41;
}
break;
}
}
}
else
{
print $8 " is not in referentiel ";
}
}
Here is a better formatted same code with the initial typo.
BEGIN {
FS = "[\t,=]";
OFS = "|";
}
FNR == NR {
a[$1];
ip[NR] = $1;
site[NR] = $2;
next;
}
sudo awk -f {
l = length(ip);
if($8 in a) {
for(k = 0; k <= l; k++) {
if(ip[k] == $8) {
if(NF <= 70) {
print "siteID Ipam: "site[k],"siteID zsc: "$14,"date: " $4,"src: "$8,"dst: "$10,"role: "$22,"urlcategory: "$36, "urlsupercategory: "$38,"urlclass: "$40;
}
else {
print "siteID Ipam: "site[k], "siteID zsc: "$14,"date: " $4, "src: " $8, "dst: " $10, "role: "$22, "urlcategory: " $37, "urlsupercategory: "$39, "urlclass: $41;
}
break;
}
}
} else {
print $8 " is not in referentiel ";
}
}
suggest:
fix sudo awk -f typo.
a[$1]; --> a[$1] = 1;
($8 in a) --> (a[$8])

checking cell value in csv and formatting in HTML using awk

#!/usr/bin/awk -f
BEGIN {
FS=","
print "<table>"
}
{
gsub(/</, "\\<")
gsub(/>/, "\\>")
gsub(/&/, "\\>")
print "\t<tr>"
for(f = 1; f <= NF; f++) {
if(NR == 1 && header) {
printf "\t\t<th>%s</th>\n", $f
}
else printf "\t\t<td>%s</td>\n", $f
}
print "\t</tr>"
}
END {
print "</table>"
}
how to check value of $f inside loop if cell value contains "No" then how to print using
printf("<TD class=AltGreen align=right height="17" width="5%">%s</TD>\n",$f)
instead of printf "\t\t<td>%s</td>\n", $f
Input.csv
USA,NO,45
UK,YES,90*
I have made a couple of changes to your original logic in Awk
To remove the empty spaces from the $f fields, while parsing in a loop
Include check for $f to string NO
The Awk code I use as follows,
#!/usr/bin/awk -f
BEGIN {
FS=","
print "<table>"
}
{
gsub(/</, "\\<")
gsub(/>/, "\\>")
gsub(/&/, "\\>")
print "\t<tr>"
for(f = 1; f <= NF; f++) {
gsub(/ /, "", $f)
if(NR == 1 && header) {
printf "\t\t<th>%s</th>\n", $f
}
else if ( $f == "NO" ) {
printf "\t\t<TD class=AltGreen align=right height=\"17\" width=\"5%\">%s</TD>\n",$f
}
else printf "\t\t<td>%s</td>\n", $f
}
print "\t</tr>"
}
END {
print "</table>"
}
produced an output as
<table>
<tr>
<td>USA</td>
<TD class=AltGreen align=right height="17" width="5%">NO</TD>
<td>45</td>
</tr>
<tr>
<td>UK</td>
<td>YES</td>
<td>90*</td>
</tr>
</table>
#!/usr/bin/awk -f
BEGIN {
#header = 1
# for the no in OP and NO in sample
IGNORECASE = 1
FS=","
print "<table>"
}
{
gsub(/</, "\\<")
gsub(/>/, "\\>")
gsub(/&/, "\\>")
print "\t<tr>"
for(f = 1; f <= NF; f++) {
if(NR == 1 && header) {
printf "\t\t<th>%s</th>\n", $f
}
else {
# your NO filtering
if ( $f ~ /^NO$/) {
printf("<TD class=AltGreen align=right height=\"17\" width=\"5%\">%s</TD>\n", $f)
else {
printf "\t\t<td>%s</td>\n", $f
}
}
}
print "\t</tr>"
}
END {
print "</table>"
}
i just modify a bit your code to keep it the most as you do.
use $f ~ //
I add IGNORECASE, 0 for case sensitive, 1 not
adapt your double quote for quoted value of HTML output
Some remarks:
I think you want to replace gsub(/&/, "\\>") with gsub(/&/, "\\&").
You do not need header when you check on NR.
When you want to check on "NO" in the header too, you can do something like
echo "USA,NO,45
UK,YES,90*" | awk '
BEGIN {
FS=","
print "<table>"
}
{
gsub(/</, "\\<")
gsub(/>/, "\\>")
gsub(/&/, "\\&")
print "\t<tr>"
if(NR==1) {
tag="th"
} else {
tag="td"
}
for (f = 1; f <= NF; f++) {
if ( $f =="NO") {
printf("<%s class=AltGreen align=right height=\"17\" width=\"5%%\">%s</%s>\n",
tag, $f, tag)
} else {
printf "\t\t<%s>%s</%s>\n", tag, $f, tag
}
}
print "\t</tr>"
}

AWK - Working with two files

I have these two csv files:
File A:
veículo;carro;sust
automóvel;carro;sust
viatura;carro;sust
breve;rápido;adj
excepcional;excelente;adj
maravilhoso;excelente;adj
amistoso;simpático;adj
amigável;simpático;adj
...
File B:
"A001","carro","sust","excelente","adj","ocorrer","adv","bom","adj"
...
In the file A, $1(word) is synonym for $2(word) and $3(word) the part of speech.
In the lines of the file B we can skip $1,the remaining columns are words and their part of speech.
What I need to to do is to look line by line each pair (word-pos) in the file A and generate a line for each synonym. It is difficult to explain.
Desired Output:
"A001","carro","sust","excelente","adj","ocorrer","adv","bom","adj"
"A001","viatura","sust","excelente","adj","ocorrer","adv","bom","adj"
"A001","veículo","sust","excelente","adj","ocorrer","adv","bom","adj"
"A001","automóvel","sust","excelente","adj","ocorrer","adv","bom","adj"
"A001","carro","sust","excepcional","adj","ocorrer","adv","bom","adj"
"A001","viatura","sust","excepcional","adj","ocorrer","adv","bom","adj"
"A001","veículo","sust","excepcional","adj","ocorrer","adv","bom","adj"
"A001","automóvel","sust","excepcional","adj","ocorrer","adv","bom","adj"
"A001","carro","sust","maravilhoso","adj","ocorrer","adv","bom","adj"
"A001","viatura","sust","maravilhoso","adj","ocorrer","adv","bom","adj"
"A001","veículo","sust","maravilhoso","adj","ocorrer","adv","bom","adj"
"A001","automóvel","sust","maravilhoso","adj","ocorrer","adv","bom","adj"
Done:
BEGIN {
FS="[,;]";
OFS=";";
}
FNR==NR{
sinonim[$1","$2","$3]++;
next;
}
{
s1=split($0,AX,"\n");
for (i=1;i<=s1;i++)
{
s2=split(AX[i],BX,",");
for (j=2;j<=NF;j+=2)
{
lineX=BX[j]","BX[j+1];
gsub(/\"/,"",lineX);
for (item in sinonim)
{
s3=split(item,CX,",");
lineS=CX[2]","CX[3];
if (lineX == lineS)
{
BX[j]=CX[1];
lineD=""
for (t=1;t<=s2;t++)
{
lineD=lineD BX[t]",";
}
lineF=lineF lineD"\n";
}
}
}
}
print lineF
}
$ cat tst.awk
BEGIN { FS=";" }
NR==FNR { synonyms[$2,$3][$2]; synonyms[$2,$3][$1]; next }
FNR==1 { FS=OFS="\",\""; $0=$0 }
{
gsub(/^"|"$/,"")
for (i=2;i<NF;i+=2) {
if ( ($i,$(i+1)) in synonyms) {
for (synonym in synonyms[$i,$(i+1)]) {
$i = synonym
for (j=2;j<NF;j+=2) {
if ( ($j,$(j+1)) in synonyms) {
for (synonym in synonyms[$j,$(j+1)]) {
orig = $0
$j = synonym
if (!seen[$0]++) {
print "\"" $0 "\""
}
$0 = orig
}
}
}
}
}
}
}
.
$ awk -f tst.awk fileA fileB
"A001","carro","sust","excelente","adj","ocorrer","adv","bom","adj"
"A001","veículo","sust","excelente","adj","ocorrer","adv","bom","adj"
"A001","automóvel","sust","excelente","adj","ocorrer","adv","bom","adj"
"A001","viatura","sust","excelente","adj","ocorrer","adv","bom","adj"
"A001","carro","sust","maravilhoso","adj","ocorrer","adv","bom","adj"
"A001","carro","sust","excepcional","adj","ocorrer","adv","bom","adj"
"A001","veículo","sust","maravilhoso","adj","ocorrer","adv","bom","adj"
"A001","veículo","sust","excepcional","adj","ocorrer","adv","bom","adj"
"A001","automóvel","sust","maravilhoso","adj","ocorrer","adv","bom","adj"
"A001","automóvel","sust","excepcional","adj","ocorrer","adv","bom","adj"
"A001","viatura","sust","maravilhoso","adj","ocorrer","adv","bom","adj"
"A001","viatura","sust","excepcional","adj","ocorrer","adv","bom","adj"
The above uses GNU awk for multi-dimensional arrays, with other awks it's a simple tweak to use synonyms[$2,$3] = synonyms[$2,$3] " " $2 etc. or similar and then split() later instead of synonyms[$2,$3][$2] and in.
BEGIN { FS="[,;]"; OFS="," }
NR == FNR { key = "\"" $2 "\""; synonym[key] = synonym[key] "," $1; next }
{
print;
if ($2 in synonym) {
count = split(substr(synonym[$2], 2), choices)
for (i = 1; i <= count; i++) {
$2 = "\"" choices[i] "\""
print
}
}
}

Awk input variable as a rule

Good day!
I have the next code:
BLOCK=`awk '
/\/\* R \*\// {
level=1
count=0
}
level {
n = split($0, c, "");
for (i = 1; i <= n; i++)
{
printf(c[i]);
if (c[i] == ";")
{
if(level==1)
{
level = 0;
if (count != 0)
printf("\n");
};
}
else if (c[i] == "{")
{
level++;
count++;
}
else if (c[i] == "}")
{
level--;
count++;
}
}
printf("\n")
}' $i`
That code cuts the piece of the file from /* R */ mark to the ';' symbol with taking into account the details like braces etc. But that isn't important. I want to replace the hard-coded /* R */ by the variable:
RECORDSEQ="/* R */"
...
BLOCK=`awk -v rec="$RECORDSEQ" '
rec {
level=1
count=0
}
But that doesn't work.
How can I fix it?
Thank you in advance.
Found the solution:
RECORDSEQ="/* R */"
# Construct regexp for awk
RECORDSEQREG=`echo "$RECORDSEQ" | sed 's:\/:\\\/:g;s:\*:\\\*:g'`
# Cycle for files
for i in $SOURCE;
do
# Find RECORDSEQ and cut out the block
BLOCK=`awk -v rec="$RECORDSEQREG" '
$0 ~ rec {
level=1
count=0
}
...
Many thanks to people who helped.

awk: add a line if missing

I've an awk script which processes .ICS calendar files.
I need to add the ATTENDEE line if it's missing.
I already have a script which parses all the events taking in considerations only the ones I need given a CHECKPARM criteria. I need to add the ATTENDEE if it's not present already.
/BEGIN:VEVENT/ { cache = 1; }
/CHECKPARM/ {
if( index( $0, var ) )
printf( "%s", cached_lines );
else
drop = 1;
cached_lines = "";
cache = 0;
}
# this doesn't work
#!~ /ATTENDEE/ {
# printf ("ATTENDEE: %s", organizer);
#}
cache {
cached_lines = cached_lines $0 "\n";
next;
};
!drop { print; }
/END:VEVENT/ { drop = 0; }
Try using a flag, if line is present, set it, if not, add line.
Something like this:
/ATTENDEE/ {att = 1}
!att {
printf ("ATTENDEE: %s\n", organizer)
}