#!/bin/bash

S_LEN=0
E_LEN=0
LINE_CNT=0
ELINE_CNT=0

for i in `ls ${1} | grep ENUS`; do # only process english text (this assumes that *all* dialogue filenames contain a ENUS/lang)
	S=$(cat "${1}/${i}" | grep -Po '".*"') # grab everything between '"' (including '"')

	while IFS= read -r line; do
		if [[ -n $(echo "${line}" | grep '\.\.\.') ]]; then
			(( ELINE_CNT += 1 ))
		fi
	done <<< "${S}"

	(( S_LEN += $(echo "${S}" | wc --chars) ))
	(( LINE_CNT += $(echo "${S}" | wc --lines) ))
	(( S_LEN -= $(echo "${S}" | wc --lines) * 2 )) # remove '"'s from total char count (2x per line)

	(( E_LEN += $(echo "${S}" | grep -Po '\.\.\.' | tr -d '\n' | wc --chars) ))
done

echo "Total characters: ${S_LEN}"
echo "Total lines: ${LINE_CNT}"
echo "Total ellipses count: $(( ${E_LEN}/3 ))"
echo "Lines containing ellipses: ${ELINE_CNT}"
echo "Ellipses/characters: $(echo "scale=2; (${E_LEN} * 100) / ${S_LEN}" | bc )%"
echo "Lines containing ellipses: $(echo "scale=2; (${ELINE_CNT} * 100) / ${LINE_CNT}" | bc )%"
