1. Introduction
Guilherme Junqueira's script to parse access.log2. Script
#!/bin/bash
#######################################################################################################
# Description: this parse calculates the time taken stats to serve each page #
# It removes the parameters provided after '?' ou ';' #
# Aggregation options: second, minute, hour, day #
# #
# Parameters (in order): #
# SEP - the separator between fields inside each file (usually ' ') #
# DATE_FIELD - Index of the field that contains the date (usually 5) #
# AGG_OPTION - Either second, minute, hour, day #
# TIMETAKEN_FIELD - Index of the field of the time taken #
# ID - label to identify the output of the log (usually the server name) #
#######################################################################################################
DATE_FIELD=$1
AGG_OPTION=$2
TIMETAKEN_FIELD=$3
ID='unspecified'
if [[ ! -z $4 ]];
then
ID=$4
fi
SEP=' '
if [[ ! -z $5 ]];
then
SEP=$5
fi
######################################################################################################
############################ Non configurable section below this line ################################
if [[ -z "$DATE_FIELD" || -z "$AGG_OPTION" || -z "$TIMETAKEN_FIELD" ]]
then
echo
echo "Not all parameters have been provided or set properly."
echo "Usage: $(basename $0) DATE_FIELD AGG_OPTION TIMETAKEN_FIELD [ID] [SEP]"
echo "Alternate usage: $(basename $0) (with parameters set internally)"
echo
exit 1
fi
awk -F "$SEP" -v dateField=$DATE_FIELD -v aggOpt=$AGG_OPTION -v ttField=$TIMETAKEN_FIELD -v vID=$ID '
function getDate(df, agg) {
if( agg == "second" ) {
split($df, a, ":");
data=substr(a[1], 2) " " a[2] ":" a[3] ":" a[4];
} else if ( agg == "minute" ) {
split($df, a, ":");
data=substr(a[1], 2) " " a[2] ":" a[3];
} else if ( agg == "hour" ) {
split($df, a, ":");
data=substr(a[1], 2) " " a[2];
} else {
split($df, a, ":");
data=substr(a[1], 2)
}
return(data)
}
{
data=getDate(dateField, aggOpt)
req[data]++
time[data]+=$ttField
if($ttField > max[data]){
max[data]=$ttField
}
if( min[data] == "" ){
min[data]=$ttField
} else if ($ttField < min[data]){
min[data]=$ttField
}
}
END {
printf("%15s;%25s;%20s;%15s;%15s;%15s;\n", "ID", "DATA", "SUM TIME TAKEN", "REQS", "MAX", "MIN");
for(i in req){
printf("%15s;%25s;%20d;%15d;%15d;%15d;\n", vID, i, time[i], req[i], max[i], min[i]);
}
}'
3. References
- Special thanks for Guilherme Junqueira