#!/usr/bin/env bash lines=$(cat dril.html |\ # war crimes grep "
" | sed -e "s%
\(.*\)
%\1%" |\ # remove links grep -vE 'https?://' |\ # remove RTs and replies (but not inline @'s) grep -v -e '^RT @' -e '^@' |\ # unescape html entities sed -e 's/\&/\&/g' -e 's/<//g' ) total=$(echo -n "$lines" | wc -l) at=1 while read tweet; do echo "$tweet" | sed 's%
%\n%g' echo " -- dril" echo % (( at++ )) >&2 echo -ne "\rProcessed: $at / $total" done <<< "$lines"