Principalmente lo uso per scaricare tantissime registrazioni live di vari concerti registrati a mio avviso in maniera impeccabile.
Nel sito si trova una guida per scaricare in bulk usando wget e gli strumenti del sito, ma risulta piuttosto prolissa e complicata se si vuole fare un download al volo.
Questo e` lo script che uso, modificato da questo script: e` scritto in bash e funziona su tutte le distribuzioni sulle quali e` installato wget, tail e sed.
#!/bin/bash
# Write here the extension of the file that you want to accept
#filetype =.flac
#append this to line 24
#-A .$filetype
#Write here the extension of the file that you want to reject, divided by a comma
fileremove = .null
if [ "$1" = "" ]; then
echo USAGE: archivedownload.sh collectionname
echo See Archive.org entry page for the collection name.
echo Collection name must be entered exactly as shown: lower case, with hyphens.
exit
fi
echo Downloading list of entries for collection name $1...
wget -nd -q "http://archive.org/advancedsearch.php?q=collection%3A$1&fl%5B%5D=identifier&sort%5B%5D=identifier+asc&sort%5B%5D=&sort%5B%5D=&rows=9999&page=1&callback=callback&save=yes&output=csv" -O identifiers.txt
echo Processing entry list for wget parsing...
tail -n +2 identifiers.txt | sed 's/"//g' > processedidentifiers.txt
if [ "`cat processedidentifiers.txt | wc -l`" = "0" ]; then
echo No identifiers found for collection $1. Check name and try again.
rm processedidentifiers.txt identifiers.txt
exit
fi
echo Beginning wget download of `cat processedidentifiers.txt | wc -l` identifiers...
wget -r -H -nc -np -nH -nd -e -R $fileremove robots=off -i processedidentifiers.txt -B 'http://archive.org/download/'
rm identifiers.txt processedidentifiers.txt
echo Complete.
# Write here the extension of the file that you want to accept
#filetype =.flac
#append this to line 24
#-A .$filetype
#Write here the extension of the file that you want to reject, divided by a comma
fileremove = .null
if [ "$1" = "" ]; then
echo USAGE: archivedownload.sh collectionname
echo See Archive.org entry page for the collection name.
echo Collection name must be entered exactly as shown: lower case, with hyphens.
exit
fi
echo Downloading list of entries for collection name $1...
wget -nd -q "http://archive.org/advancedsearch.php?q=collection%3A$1&fl%5B%5D=identifier&sort%5B%5D=identifier+asc&sort%5B%5D=&sort%5B%5D=&rows=9999&page=1&callback=callback&save=yes&output=csv" -O identifiers.txt
echo Processing entry list for wget parsing...
tail -n +2 identifiers.txt | sed 's/"//g' > processedidentifiers.txt
if [ "`cat processedidentifiers.txt | wc -l`" = "0" ]; then
echo No identifiers found for collection $1. Check name and try again.
rm processedidentifiers.txt identifiers.txt
exit
fi
echo Beginning wget download of `cat processedidentifiers.txt | wc -l` identifiers...
wget -r -H -nc -np -nH -nd -e -R $fileremove robots=off -i processedidentifiers.txt -B 'http://archive.org/download/'
rm identifiers.txt processedidentifiers.txt
echo Complete.
Francesco Mecca