sexta-feira, 29 de agosto de 2014

Bash Script to parse access log - keywords: parse, awk, bash, statistics, time taken

1. Introduction

Guilherme Junqueira's script to parse access.log

2. Script


#!/bin/bash

#######################################################################################################
# Description: this parse calculates the time taken stats to serve each page                          #
#              It removes the parameters provided after '?' ou ';'                                    #
#              Aggregation options: second, minute, hour, day                                         #
#                                                                                                     #
# Parameters (in order):                                                                              #
#    SEP             - the separator between fields inside each file (usually ' ')                    #
#    DATE_FIELD      - Index of the field that contains the date (usually 5)                          #
#    AGG_OPTION      - Either second, minute, hour, day                                               #
#    TIMETAKEN_FIELD - Index of the field of the time taken                                           #
#    ID              - label to identify the output of the log (usually the server name)              #
#######################################################################################################

DATE_FIELD=$1
AGG_OPTION=$2
TIMETAKEN_FIELD=$3
ID='unspecified'
if [[ ! -z $4 ]];
then
   ID=$4
fi

SEP=' '
if [[ ! -z $5 ]];
then
   SEP=$5
fi

######################################################################################################
############################ Non configurable section below this line ################################

if [[ -z "$DATE_FIELD" || -z "$AGG_OPTION" || -z "$TIMETAKEN_FIELD" ]]
then
  echo
  echo "Not all parameters have been provided or set properly."
  echo "Usage: $(basename $0) DATE_FIELD AGG_OPTION TIMETAKEN_FIELD [ID] [SEP]"
  echo "Alternate usage: $(basename $0) (with parameters set internally)"
  echo
  exit 1
fi

awk -F "$SEP" -v dateField=$DATE_FIELD -v aggOpt=$AGG_OPTION -v ttField=$TIMETAKEN_FIELD -v vID=$ID '
function getDate(df, agg) {
   if( agg == "second" ) {
      split($df, a, ":");
      data=substr(a[1], 2) " " a[2] ":" a[3] ":" a[4];

   } else if ( agg == "minute" ) {
      split($df, a, ":");
      data=substr(a[1], 2) " " a[2] ":" a[3];

   } else if ( agg == "hour" ) {
      split($df, a, ":");
      data=substr(a[1], 2) " " a[2];

   } else {
      split($df, a, ":");
      data=substr(a[1], 2)
   }
   return(data)
}

{
   data=getDate(dateField, aggOpt)
   req[data]++
   time[data]+=$ttField

   if($ttField > max[data]){
      max[data]=$ttField
   }

   if( min[data] == "" ){
      min[data]=$ttField
   } else if ($ttField < min[data]){
      min[data]=$ttField
   }
}

END {
    printf("%15s;%25s;%20s;%15s;%15s;%15s;\n", "ID", "DATA", "SUM TIME TAKEN", "REQS", "MAX", "MIN");

    for(i in req){
       printf("%15s;%25s;%20d;%15d;%15d;%15d;\n", vID, i, time[i], req[i], max[i], min[i]);
    }
}'


3. References


  • Special thanks for Guilherme Junqueira

Nenhum comentário:

Postar um comentário