|
|
|
@ -11,8 +11,8 @@
|
|
|
|
|
### gotbleu@gmail.com
|
|
|
|
|
###
|
|
|
|
|
### Name : 404urlcleaner
|
|
|
|
|
### Version : 0.2
|
|
|
|
|
### Date : 20190502
|
|
|
|
|
### Version : 0.3
|
|
|
|
|
### Date : 20190508
|
|
|
|
|
### Description : Remove all dead urls with HTTP status code 404 (mainly use with newsboat/newsbeuter)
|
|
|
|
|
### Depends On : bash curl coreutils grep moreutils
|
|
|
|
|
### Video Demo : https://youtu.be/dTpEuQZRRDM
|
|
|
|
@ -52,10 +52,6 @@ TMPFILE1=/tmp/urlstatus.txt
|
|
|
|
|
# only 404 status code
|
|
|
|
|
TMPFILE2=/tmp/urlstatus2nd.txt
|
|
|
|
|
|
|
|
|
|
# remove temp files
|
|
|
|
|
# rm "$TMPFILE1"
|
|
|
|
|
# rm "$TMPFILE2"
|
|
|
|
|
|
|
|
|
|
# create backup file
|
|
|
|
|
echo -e "${Green}>>>Creating Backup File${Color_Off}"
|
|
|
|
|
cp -aiv "$1" "$1_`date +'%F_%Hh%Ms%S'`"
|
|
|
|
@ -69,7 +65,7 @@ do
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
# double check 404 links again (sometime the first check is not acurrate)
|
|
|
|
|
echo -e "${Blue}>>>Double Checking HTTP Status Code${Color_Off}"
|
|
|
|
|
echo -e "${Blue}>>>Double Checking ONLY DEAD HTTP Status Code${Color_Off}"
|
|
|
|
|
grep 'C0DE-404' -i "$TMPFILE1" | awk '{print $1}' | while read url
|
|
|
|
|
do
|
|
|
|
|
urlstatus=$(curl -o /dev/null --silent --head --write-out '%{http_code}' "$url")
|
|
|
|
@ -77,14 +73,20 @@ do
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
# remove all 404
|
|
|
|
|
grep 'C0DE-404' -i "$TMPFILE2" | awk '{print $1}' | while read url
|
|
|
|
|
grep 'C0DE-404' -s -i "$TMPFILE2" | awk '{print $1}' | while read url
|
|
|
|
|
do
|
|
|
|
|
grep -F -v "$url" "$1" | sponge "$1"
|
|
|
|
|
echo -e "${Red}>>>REMOVING $url ${Color_Off}"
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
# remove temp files
|
|
|
|
|
rm "$TMPFILE1"
|
|
|
|
|
rm "$TMPFILE2"
|
|
|
|
|
# remove temp files if it exist
|
|
|
|
|
if [ -f "$TMPFILE1" ] ; then
|
|
|
|
|
rm "$TMPFILE1"
|
|
|
|
|
fi
|
|
|
|
|
if [ -f "$TMPFILE2" ] ; then
|
|
|
|
|
rm "$TMPFILE2"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
echo -e "${Green}>>>Scan Completed${Color_Off}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|