Bulk download and extract zip files from urls
Snippet
#!/bin/bash
# Configuration variables - modify these as needed
URLS_FILE="/path/to/urls.txt" # Path to file containing list of ZIP URLs
TARGET_DIR="/path/to/target/directory" # Directory where files will be extracted
TEMP_DIR="/tmp/zip_downloads" # Temporary directory for downloads
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Function to print colored output
print_status() {
echo -e "${GREEN}[INFO]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Function to check if required tools are installed
check_dependencies() {
local missing_deps=()
command -v wget >/dev/null 2>&1 || command -v curl >/dev/null 2>&1 || missing_deps+=("wget or curl")
command -v unzip >/dev/null 2>&1 || missing_deps+=("unzip")
if [ ${#missing_deps[@]} -ne 0 ]; then
print_error "Missing dependencies: ${missing_deps[*]}"
exit 1
fi
}
# Function to validate inputs
validate_inputs() {
# Check if URLs file exists
if [ ! -f "$URLS_FILE" ]; then
print_error "URLs file not found: $URLS_FILE"
exit 1
fi
# Check if URLs file is readable
if [ ! -r "$URLS_FILE" ]; then
print_error "Cannot read URLs file: $URLS_FILE"
exit 1
fi
# Check if target directory exists, create if it doesn't
if [ ! -d "$TARGET_DIR" ]; then
print_warning "Target directory doesn't exist. Creating: $TARGET_DIR"
mkdir -p "$TARGET_DIR"
if [ $? -ne 0 ]; then
print_error "Failed to create target directory: $TARGET_DIR"
exit 1
fi
fi
}
# Function to create temporary directory
setup_temp_dir() {
if [ ! -d "$TEMP_DIR" ]; then
mkdir -p "$TEMP_DIR"
if [ $? -ne 0 ]; then
print_error "Failed to create temporary directory: $TEMP_DIR"
exit 1
fi
fi
}
# Function to download file
download_file() {
local url="$1"
local filename="$2"
local filepath="$TEMP_DIR/$filename"
print_status "Downloading: $url"
# Try wget first, then curl
if command -v wget >/dev/null 2>&1; then
wget -q -O "$filepath" "$url"
elif command -v curl >/dev/null 2>&1; then
curl -s -L -o "$filepath" "$url"
else
print_error "Neither wget nor curl is available"
return 1
fi
if [ $? -eq 0 ] && [ -f "$filepath" ]; then
print_status "Downloaded successfully: $filename"
return 0
else
print_error "Failed to download: $url"
return 1
fi
}
# Function to extract zip file
extract_zip() {
local zipfile="$1"
local filename=$(basename "$zipfile")
print_status "Extracting: $filename"
# Extract to target directory
unzip -q -o "$zipfile" -d "$TARGET_DIR"
if [ $? -eq 0 ]; then
print_status "Extracted successfully: $filename"
return 0
else
print_error "Failed to extract: $filename"
return 1
fi
}
# Function to cleanup downloaded zip file
cleanup_zip() {
local zipfile="$1"
local filename=$(basename "$zipfile")
print_status "Cleaning up: $filename"
rm -f "$zipfile"
if [ $? -eq 0 ]; then
print_status "Cleaned up: $filename"
else
print_warning "Failed to clean up: $filename"
fi
}
# Function to process a single URL
process_url() {
local url="$1"
local filename=$(basename "$url")
# If filename doesn't end in .zip, append .zip
if [[ ! "$filename" =~ \.zip$ ]]; then
filename="${filename}.zip"
fi
local filepath="$TEMP_DIR/$filename"
print_status "Processing: $url"
# Download the file
if download_file "$url" "$filename"; then
# Extract the file
if extract_zip "$filepath"; then
# Clean up the downloaded zip
cleanup_zip "$filepath"
print_status "Successfully processed: $url"
return 0
else
# Clean up on extraction failure
cleanup_zip "$filepath"
return 1
fi
else
return 1
fi
}
# Main execution function
main() {
print_status "Starting zip download and extraction script"
print_status "URLs file: $URLS_FILE"
print_status "Target directory: $TARGET_DIR"
print_status "Temporary directory: $TEMP_DIR"
# Check dependencies
check_dependencies
# Validate inputs
validate_inputs
# Setup temporary directory
setup_temp_dir
# Process each URL
local success_count=0
local failure_count=0
local total_count=0
local failed_urls=()
while IFS= read -r url; do
# Skip empty lines and comments
if [[ -z "$url" || "$url" =~ ^[[:space:]]*# ]]; then
continue
fi
# Remove leading/trailing whitespace
url=$(echo "$url" | xargs)
total_count=$((total_count + 1))
if process_url "$url"; then
success_count=$((success_count + 1))
else
failure_count=$((failure_count + 1))
failed_urls+=("$url")
fi
echo "" # Add blank line between processes
done < "$URLS_FILE"
# Final cleanup - remove temp directory if empty
if [ -d "$TEMP_DIR" ] && [ -z "$(ls -A "$TEMP_DIR")" ]; then
rmdir "$TEMP_DIR"
print_status "Removed empty temporary directory"
fi
# Print summary
print_status "Processing complete!"
print_status "Total URLs processed: $total_count"
print_status "Successful: $success_count"
if [ $failure_count -gt 0 ]; then
print_warning "Failed: $failure_count"
echo ""
print_error "Failed URLs:"
for failed_url in "${failed_urls[@]}"; do
echo " - $failed_url"
done
echo ""
print_status "You can create a new URLs file with just the failed URLs to retry them."
fi
}
# Run the main function
main "$@"
By
xtream1101
•
•
Updated
2025-08-05 13:35