Creating a script with options to access different directories and files
If I was implementing this in bash, I'd do the following. I won't comment on it much: feel free to ask specific questions though -- check the bash man page first if you don't know how a particular command works.
#!/bin/bash
# read the population from all the files
# map the filename to it's population figure
declare -A population
while IFS= read -d '' -r filename; do
population["$filename"]=$(tail -1 "$filename")
done < <(find . -type f -print0)
# prompt the user for the year
read -rp "What year? " year
# find the relevant files for that year
year_files=()
for filename in "${!population[@]}"; do
[[ $filename == ./"$year"/* ]] && year_files+=("$filename")
done
if [[ "${#year_files[@]}" -eq 0 ]]; then
echo "No files for year '$year'"
exit 1
fi
PS3="Select a function to calculate: "
select func in minimum maximum average quit; do
case $func in
minimum)
min=${population[${year_files[0]}]}
for file in "${year_files[@]}"; do
if (( min > ${population[$file]} )); then
min=${population[$file]}
fi
done
echo "Minimum for $year is $min"
;;
maximum)
max=${population[${year_files[0]}]}
for file in "${year_files[@]}"; do
if (( max < ${population[$file]} )); then
max=${population[$file]}
fi
done
echo "Maximum for $year is $max"
;;
average)
count=0 sum=0
for file in "${year_files[@]}"; do
(( sum += ${population[$file]} ))
(( count++ ))
done
echo "Average for $year is $(( sum / count ))"
;;
quit) exit ;;
esac
done
I write a simple awk
script which does the same as what you are doing:
# read 'year' & 'option' from user
# or you can pass as argument to the command $1<-->$year & $2<-->$option
find /path/to/$year -type f -exec \
awk -v select=$option '
FNR==4 { sum+=$0; avg=sum/++count;
max=(max>=$0?max:$0);
if (count==1) min=$0;
}
count>1 { min=(min<=$0?min:$0);
}
END{ stats=min","max","avg","min"\n"max"\n"avg;
split(stats, to_print,",");
print to_print[select];
}' {} +
Explanation inline:
# read 'year' & 'option' from user
# or you can pass as argument to the command $1<-->$year & $2<-->$option
find /path/to/$year -type f -exec \
# find all files under "/path/to/$year". $year will be substitute with the value
# of 'year' variable read from user-input or replace it with '$1' as first argument to the command
awk -v select=$option '
# read the value of shell 'option' variable into an awk 'select' variable
# replace with '$2' as argument to the command
FNR==4 { sum+=$0; avg=sum/++count;
# if it's 4th line of each input file, sum-up the value into 'sum' variable
# and calculate the 'avg' too when 'count' will increment once each 4th record in a file is read
max=(max>=$0?max:$0);
# its a Ternary operator (condition?if-true:if-false) and finding maximum value
if (count==1) min=$0;
# keep the first file's 4th line's value as minimum. you could use `NR==4` instead
}
count>1 { min=(min<=$0?min:$0);
# same as max, update the 'min' if value in current file is smaller than 'min' in previous file
}
END{ stats=min","max","avg","min"\n"max"\n"avg;
# saving all variables' value into single variable with comma separated. I used <min"\n"max"\n"avg> as
# fourth element which we will use it as "All" option that each separated with newlines.
split(stats, to_print, ",");
# building an array called 'to_print' from 'stats' variable above with comma separator to distinguish
# the elements from each other.
print to_print[select];
# this will print the element which user-input as an option.
# if input 1: will print 'min'
# if input 2: will print 'max'
# if input 3: will print 'avg'
# if input 4: will print 'min' \n 'max' '\n' avg
}' {} +