diff stuff/wordfreq @ 57:eb0815f21f04

added some auxiliary files: e.g. statistics
author markus schnalke <meillo@marmaro.de>
date Mon, 20 Oct 2014 07:09:57 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/stuff/wordfreq	Mon Oct 20 07:09:57 2014 +0200
@@ -0,0 +1,10 @@
+#!/bin/sh
+#
+# print word frequency
+
+deroff "$@" |
+        tr -c 'A-Za-zÄÖÜäöüß-' '\n' | tr A-ZÄÖÜ a-zäöü |
+	sed '/^ *$/d'| sort |uniq -c | awk '
+{sum+=$1; a[$2]=$1;}
+END {for (x in a) {printf("%s\t%.2f\t%4d\n", x, a[x]/sum, a[x])} }
+'| sort -nr -k 3