view stuff/wordfreq @ 59:9b01685b5ee2

applied language and formating requests by PersBib
author markus schnalke <meillo@marmaro.de>
date Sun, 26 Oct 2014 10:34:33 +0100
parents eb0815f21f04
children
line wrap: on
line source

#!/bin/sh
#
# print word frequency

deroff "$@" |
        tr -c 'A-Za-zÄÖÜäöüß-' '\n' | tr A-ZÄÖÜ a-zäöü |
	sed '/^ *$/d'| sort |uniq -c | awk '
{sum+=$1; a[$2]=$1;}
END {for (x in a) {printf("%s\t%.2f\t%4d\n", x, a[x]/sum, a[x])} }
'| sort -nr -k 3