DL Blog: 09/2008

In part 1, we had some awk code that could parse the stat file and show the PNUMs with the source and destination servers and file names, tab delimited.

To illustrate why anybody would be interested in doing this, here is what some stats look like without a parse script:

Not very convenient to gather information from, especially if you have thousands of transmissions and detailed analysis to do. Let's add some features to the awk code from part 1. First, we can wrap the awk into a shell script, and accept the fields we want on the command line. Some fields, such as RMTP, may have equal signs in the values. Since we are splitting on equal signs we need to put the values back together, so let's fix that, also.

#!/bin/ksh
PATH=/usr/xpg4/bin:/usr/bin
# default field IDs
FIELDIDS=PNUM,PNOD,SNOD,SFIL,DFIL,CCOD
# get different field IDs from command line, if they are specified
if [ "$1" = "-f" -a -n "$2" ]; then
FIELDIDS=$2
fi

I name this script parse. Now let's use it to show the stats. This shows me a neat, tab-delimited list of what files I transmitted outbound yesterday, with destination node name and IP, filenames, and completion codes:

$ cat S20080924.001 |grep PNOD=john |egrep "CTRC|SSTR" |parse -f PNUM,RECI,SNOD,DFIL,RMTP,CCOD
123     SSTR    paul            192.168.11.132, PORT=1364
123     CTRC    paul    bill.udot.mktb.200809241130             0
124     SSTR    george          192.168.11.145, PORT=1364
124     CTRC    george bill.udot.mktb.200809241130             0
125     SSTR    paul            192.168.11.132, PORT=1364
125     CTRC    paul    bill.udot.mkta.200809241137             0
126     SSTR    george          192.168.11.145, PORT=1364
126     CTRC    george bill.udot.mkta.200809241137             0
127     SSTR    paul            192.168.11.132, PORT=1364
127     CTRC    paul    bill.udot.mkta.200809241153             0
128     SSTR    george          192.168.11.145, PORT=1364
128     CTRC    george bill.udot.mkta.200809241153             0
129     SSTR    paul            192.168.11.132, PORT=1364
129     CTRC    paul    bill.udot.mktb.200809241156             0
130     SSTR    george          192.168.11.145, PORT=1364
130     CTRC    george bill.udot.mktb.200809241156             0
131     SSTR    paul            192.168.11.132, PORT=1364
131     CTRC    paul    bill.udot.mktb.200809241230             0
132     SSTR    george          192.168.11.145, PORT=1364
132     CTRC    george bill.udot.mktb.200809241230             0
133     SSTR    paul            192.168.11.132, PORT=1364
133     CTRC    paul    bill.udot.mkta.200809241237             0
134     SSTR    george          192.168.11.145, PORT=1364
134     CTRC    george bill.udot.mkta.200809241237             0
135     SSTR    paul            192.168.11.132, PORT=1364
135     CTRC    paul    bill.udot.mkta.200809241253             0
136     SSTR    george          192.168.11.145, PORT=1364
136     CTRC    george bill.udot.mkta.200809241253             0
137     SSTR    paul            192.168.11.132, PORT=1364
137     CTRC    paul    bill.udot.mktb.200809241256             0
138     SSTR    george          192.168.11.145, PORT=1364
138     CTRC    george bill.udot.mktb.200809241256             0
139     SSTR    paul            192.168.11.132, PORT=1364
139     CTRC    paul    todtrigger.200809241722.input           0
140     SSTR    paul            192.168.11.132, PORT=1364
140     CTRC    paul    todtrigger.200809241723.input           0
141     SSTR    paul            192.168.11.132, PORT=1364
141     CTRC    paul    todtrigger.200809241724.input           0
142     SSTR    paul            192.168.11.132, PORT=1364
142     CTRC    paul    todtrigger.200809241725.input           0
143     SSTR    paul            192.168.11.132, PORT=1364
143     CTRC    paul    todtrigger.200809241726.input           0
144     SSTR    paul            192.168.11.132, PORT=1364
144     CTRC    paul    todtrigger.200809241727.input           0

What if I wanted to clean up this output a little bit more? I really just wanted the destination IP address, but the RMTP field has the IP address and port number in it, and a comma. This is where we can derive fields from existing information inside other fields.

awk -F"|" '{
# populate array B with all values using field names as subscripts
for (i=1;i<=NF;i++) {
    SS=split($i,A,"=");SUB=A[1]; B[SUB]=A[2];delete A[1];delete A[2]
    # if the field has a second = in it, that means $i was split
    # into more than 2 pieces, gather the pieces
    for (j=3;j<=SS;j++) {
      B[SUB]=B[SUB]"="A[j];delete A[j]
    }
}
# go through all the fields requested and show values
NE=split(FIELDIDS,F,",")
for (IX=1;IX<=NE;IX++) {
    FLD=F[IX]
    if (FLD=="LOCIP") {
      split(B["LCLP"],A,",")
      B["LOCIP"]=A[1]
      delete A[1]; delete A[2]
    }
    if (FLD=="LOCPORT") {
      split(B["LCLP"],A,"=")
      B["LOCPORT"]=A[2]
      delete A[1]; delete A[2]
    }
    if (FLD=="RMTIP") {
      split(B["RMTP"],A,",")
      B["RMTIP"]=A[1]
      delete A[1]; delete A[2]
    }
    if (FLD=="RMTPORT") {
      split(B["RMTP"],A,"=")
      B["RMTPORT"]=A[2]
      delete A[1]; delete A[2]
    }
    printf "%s\t",B[FLD]
    delete F[IX]
}
print ""
# clear array B
for (SUB in B) delete B[SUB]
}' FIELDIDS=$FIELDIDS -

Now we have additional derived fields to choose from, besides the regular fields inside the stat records.

$ cat S20080924.001 |grep PNOD=john |egrep "CTRC|SSTR" |parse -f PNUM,RECI,SNOD,DFIL,RMTIP,CCOD

123     SSTR    paul            192.168.11.132
123     CTRC    paul    bill.udot.mktb.200809241130             0
124     SSTR    george          192.168.11.145
124     CTRC    george bill.udot.mktb.200809241130             0
125     SSTR    paul            192.168.11.132
125     CTRC    paul    bill.udot.mkta.200809241137             0
126     SSTR    george          192.168.11.145
126     CTRC    george bill.udot.mkta.200809241137             0
127     SSTR    paul            192.168.11.132
127     CTRC    paul    bill.udot.mkta.200809241153             0
128     SSTR    george          192.168.11.145
128     CTRC    george bill.udot.mkta.200809241153             0
129     SSTR    paul            192.168.11.132
129     CTRC    paul    bill.udot.mktb.200809241156             0
130     SSTR    george          192.168.11.145
130     CTRC    george bill.udot.mktb.200809241156             0
131     SSTR    paul            192.168.11.132
131     CTRC    paul    bill.udot.mktb.200809241230             0
132     SSTR    george          192.168.11.145
132     CTRC    george bill.udot.mktb.200809241230             0
133     SSTR    paul            192.168.11.132
133     CTRC    paul    bill.udot.mkta.200809241237             0
134     SSTR    george          192.168.11.145
134     CTRC    george bill.udot.mkta.200809241237             0
135     SSTR    paul            192.168.11.132
135     CTRC    paul    bill.udot.mkta.200809241253             0
136     SSTR    george          192.168.11.145
136     CTRC    george bill.udot.mkta.200809241253             0
137     SSTR    paul            192.168.11.132
137     CTRC    paul    bill.udot.mktb.200809241256             0
138     SSTR    george          192.168.11.145
138     CTRC    george bill.udot.mktb.200809241256             0
139     SSTR    paul            192.168.11.132
139     CTRC    paul    todtrigger.200809241722.input           0
140     SSTR    paul            192.168.11.132
140     CTRC    paul    todtrigger.200809241723.input           0
141     SSTR    paul            192.168.11.132
141     CTRC    paul    todtrigger.200809241724.input           0
142     SSTR    paul            192.168.11.132
142     CTRC    paul    todtrigger.200809241725.input           0
143     SSTR    paul            192.168.11.132
143     CTRC    paul    todtrigger.200809241726.input           0
144     SSTR    paul            192.168.11.132
144     CTRC    paul    todtrigger.200809241727.input           0

This makes a beautiful and almost effortless import into Excel for further analysis:

Note: In the previous posting's comments I mentioned that you should use nawk instead of awk in Solaris. In the script I put /usr/xpg4/bin in the PATH before /usr/bin. So, if you run this in Solaris it will pick the newer, standards-compliant version of awk, which is like nawk. On other systems such as HPUX or Linux, the extra directory in the PATH will be harmless, but the script will be portable.

When you look at the stat files in the work directory (S20080912.001, for example), it is not very human readable because you have to hunt through the text to find the fields you want by name. And if you want to compare one transmission to another it is difficult. If you go into the direct prompt and run a select stat detail=yes, it is very readable but now the output is not very machine readable, meaning you can't feed that into a script or spreadsheet. Wouldn't it be great to be able to parse information out of the stat files and get exactly what you want?

I started out wanting to look at stats for a specific set of files that were transmitted but I didn't know the process numbers. The full file name may be something like procfeed.20080912_140814.input, but I want to see a list of PNUMs for all the files similar to that.

cat S200809* | grep RECI=CTRC | grep procfeed.200809 | \
awk -F"|" '{
# find the PNUM field and show value
for (i=1;i<=NF;i++) {
    split($i,A,"=")
    if (A[1]=="PNUM") {
      print A[2]
      break
    }
}
}'

So I cat the stat file and grep for the first part of the file name and look for just the CTRC copy records.    Then pipe that through a simple awk script. The -F"|" means consider a pipe character to be the field separator. Count through all the fields, splitting each one at the equals sign. If the part before the equals sign is PNUM, we've found the field. Print the value and break out of the for loop to go to the next record. The above gives an output like this:

22505
22675
22802
23216
23289

Then I went into the direct prompt and did a "select stat detail=yes pnum=(22505,22675,22802,23216,23289);" to get the details about the transmissions in a human-readable form.

I soon found myself doing this sort of thing repeatedly, and I'm looking for just a couple of pieces of info from the stats. I sure would like to view more than just the PNUM field, that way I could skip the step of going into the direct prompt to do my select stat command.

cat S200809* | grep RECI=CTRC | grep procfeed.200809 | \
awk -F"|" '{
    # populate array B with all values using field names as subscripts
    for (i=1;i<=NF;i++) {
      SS=split($i,A,"=");SUB=A[1]; B[SUB]=A[2];delete A[1];delete A[2]
    }
    # go through all the fields we need to see and show values
    NE=split("PNUM,PNOD,SNOD,SFIL,DFIL,CCOD",F,",")
    for (IX=1;IX<=NE;IX++) {
      FLD=F[IX]
      printf "%s\t",B[FLD]
      delete F[IX]
    }
    print ""
    # clear array B
    for (SUB in B) delete B[SUB]
}'

Now this gives me a nice tab-delimited list of PNUMs with the source and destination servers and file names. What the above code does is grep through the stat files for the copy records with the first part of the tranmitted file name in them, and feeds just those lines into the awk program. In the awk program we are going through all of the fields as separated by pipe characters, and populating an array with the info. One array element for each field, with the field name used for the subscript. Then go through a list of just the field names we want and display those array elements. Separate the output with tabs, print a trailing newline, and clear the array for housekeeping sake.

DL Blog

9/26/2008

Parsing Connect:Direct stats (part 2)

9/13/2008

Parsing Connect:Direct stats (part 1)

Blog Archive

Links

About Me