404urlcleaner v0.3

pull/26/head
gotbletu 5 years ago
parent c6dfb87ecc
commit 903baed919

@ -11,8 +11,8 @@
### gotbleu@gmail.com
###
### Name : 404urlcleaner
### Version : 0.2
### Date : 20190502
### Version : 0.3
### Date : 20190508
### Description : Remove all dead urls with HTTP status code 404 (mainly use with newsboat/newsbeuter)
### Depends On : bash curl coreutils grep moreutils
### Video Demo : https://youtu.be/dTpEuQZRRDM
@ -52,10 +52,6 @@ TMPFILE1=/tmp/urlstatus.txt
# only 404 status code
TMPFILE2=/tmp/urlstatus2nd.txt
# remove temp files
# rm "$TMPFILE1"
# rm "$TMPFILE2"
# create backup file
echo -e "${Green}>>>Creating Backup File${Color_Off}"
cp -aiv "$1" "$1_`date +'%F_%Hh%Ms%S'`"
@ -69,7 +65,7 @@ do
done
# double check 404 links again (sometime the first check is not acurrate)
echo -e "${Blue}>>>Double Checking HTTP Status Code${Color_Off}"
echo -e "${Blue}>>>Double Checking ONLY DEAD HTTP Status Code${Color_Off}"
grep 'C0DE-404' -i "$TMPFILE1" | awk '{print $1}' | while read url
do
urlstatus=$(curl -o /dev/null --silent --head --write-out '%{http_code}' "$url")
@ -77,14 +73,20 @@ do
done
# remove all 404
grep 'C0DE-404' -i "$TMPFILE2" | awk '{print $1}' | while read url
grep 'C0DE-404' -s -i "$TMPFILE2" | awk '{print $1}' | while read url
do
grep -F -v "$url" "$1" | sponge "$1"
echo -e "${Red}>>>REMOVING $url ${Color_Off}"
done
# remove temp files
rm "$TMPFILE1"
rm "$TMPFILE2"
# remove temp files if it exist
if [ -f "$TMPFILE1" ] ; then
rm "$TMPFILE1"
fi
if [ -f "$TMPFILE2" ] ; then
rm "$TMPFILE2"
fi
echo -e "${Green}>>>Scan Completed${Color_Off}"

Loading…
Cancel
Save