#!/bin/bash
#
# ablexicon - Compare an input list of words against a dictionary and
# optional lexicon.  If any words are in neither the dictionary nor the
# lexicon, log them to stdout.
#
set -e
set -f

function usage () {
    echo "Find occurrences of non-dictionary/lexicon words"
    echo ""
    echo "Usage:"
    echo " ${0##*/} [options]"
    echo ""
    echo "Options:"
    echo " -f, --file         source text (defaults to /dev/fd/0)"
    echo " -l, --lexicon      lexicon file (one word per line)"
    echo " -h, --help         display this help"
    exit 1
}

#
# Verify that required commands are present
#
REQUIRED=( "spell" "getopt" )
for i in "${REQUIRED[@]}"
do
    command -v $i"" >/dev/null
    if [ $? -ne "0" ]
    then
        echo "'"$i"' must be installed, exiting...">&2
        exit 1
    fi
done

GETOPT_OUT=`getopt -o hf:l: --long help,file:,lexicon: -n "${0##*/}" -- "$@"`
if [ $? != 0 ]
then
    echo "Exiting..." >&2
    exit 1
fi

eval set -- "$GETOPT_OUT"

INFILE=/dev/fd/0
LEXICON=/dev/null
while true; do
  case "$1" in
    -h | --help ) usage $0 ;;
    -f | --file ) INFILE="$2"; shift 2 ;;
    -l | --lexicon ) LEXICON="$2"; shift 2 ;;
    -- ) shift; break ;;
    * ) break ;;
  esac
done

if [ ! -f $INFILE"" ] && [ $INFILE"" != /dev/fd/0 ]
then
    echo "Invalid input file"
    usage
fi
#
# Read the lexicon into an array
#
readarray -t lexicon < $LEXICON""
lexicon_size="${#lexicon[@]}"

#
# Search for all input words in the dictionary
# and sort the output
#
for word in `cat $INFILE"" | spell | sort -u`
do
    #
    # Search for each remaining word in the lexicon
    #
    found="false"
    i="0"
    while [[ "$i" -lt "$lexicon_size" ]] && [ "$found" == "false" ]
    do
        if [ "${lexicon[i]}" == "$word" ]
        then
            found="true"
        fi
        i=$((i+1))
    done
    if [ $found"" == "false" ]
    then
        #
        # The word is neither in the dictionary nor the lexicon, send
        # it to stdout.
        #
        echo $word
    fi
done
