Bulk download and extract zip files from urls

#!/bin/bash

# Configuration variables - modify these as needed
URLS_FILE="/path/to/urls.txt"              # Path to file containing list of ZIP URLs
TARGET_DIR="/path/to/target/directory"      # Directory where files will be extracted
TEMP_DIR="/tmp/zip_downloads"              # Temporary directory for downloads

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Function to print colored output
print_status() {
    echo -e "${GREEN}[INFO]${NC} $1"
}

print_warning() {
    echo -e "${YELLOW}[WARNING]${NC} $1"
}

print_error() {
    echo -e "${RED}[ERROR]${NC} $1"
}

# Function to check if required tools are installed
check_dependencies() {
    local missing_deps=()
    
    command -v wget >/dev/null 2>&1 || command -v curl >/dev/null 2>&1 || missing_deps+=("wget or curl")
    command -v unzip >/dev/null 2>&1 || missing_deps+=("unzip")
    
    if [ ${#missing_deps[@]} -ne 0 ]; then
        print_error "Missing dependencies: ${missing_deps[*]}"
        exit 1
    fi
}

# Function to validate inputs
validate_inputs() {
    # Check if URLs file exists
    if [ ! -f "$URLS_FILE" ]; then
        print_error "URLs file not found: $URLS_FILE"
        exit 1
    fi
    
    # Check if URLs file is readable
    if [ ! -r "$URLS_FILE" ]; then
        print_error "Cannot read URLs file: $URLS_FILE"
        exit 1
    fi
    
    # Check if target directory exists, create if it doesn't
    if [ ! -d "$TARGET_DIR" ]; then
        print_warning "Target directory doesn't exist. Creating: $TARGET_DIR"
        mkdir -p "$TARGET_DIR"
        if [ $? -ne 0 ]; then
            print_error "Failed to create target directory: $TARGET_DIR"
            exit 1
        fi
    fi
}

# Function to create temporary directory
setup_temp_dir() {
    if [ ! -d "$TEMP_DIR" ]; then
        mkdir -p "$TEMP_DIR"
        if [ $? -ne 0 ]; then
            print_error "Failed to create temporary directory: $TEMP_DIR"
            exit 1
        fi
    fi
}

# Function to download file
download_file() {
    local url="$1"
    local filename="$2"
    local filepath="$TEMP_DIR/$filename"
    
    print_status "Downloading: $url"
    
    # Try wget first, then curl
    if command -v wget >/dev/null 2>&1; then
        wget -q -O "$filepath" "$url"
    elif command -v curl >/dev/null 2>&1; then
        curl -s -L -o "$filepath" "$url"
    else
        print_error "Neither wget nor curl is available"
        return 1
    fi
    
    if [ $? -eq 0 ] && [ -f "$filepath" ]; then
        print_status "Downloaded successfully: $filename"
        return 0
    else
        print_error "Failed to download: $url"
        return 1
    fi
}

# Function to extract zip file
extract_zip() {
    local zipfile="$1"
    local filename=$(basename "$zipfile")
    
    print_status "Extracting: $filename"
    
    # Extract to target directory
    unzip -q -o "$zipfile" -d "$TARGET_DIR"
    
    if [ $? -eq 0 ]; then
        print_status "Extracted successfully: $filename"
        return 0
    else
        print_error "Failed to extract: $filename"
        return 1
    fi
}

# Function to cleanup downloaded zip file
cleanup_zip() {
    local zipfile="$1"
    local filename=$(basename "$zipfile")
    
    print_status "Cleaning up: $filename"
    
    rm -f "$zipfile"
    
    if [ $? -eq 0 ]; then
        print_status "Cleaned up: $filename"
    else
        print_warning "Failed to clean up: $filename"
    fi
}

# Function to process a single URL
process_url() {
    local url="$1"
    local filename=$(basename "$url")
    
    # If filename doesn't end in .zip, append .zip
    if [[ ! "$filename" =~ \.zip$ ]]; then
        filename="${filename}.zip"
    fi
    
    local filepath="$TEMP_DIR/$filename"
    
    print_status "Processing: $url"
    
    # Download the file
    if download_file "$url" "$filename"; then
        # Extract the file
        if extract_zip "$filepath"; then
            # Clean up the downloaded zip
            cleanup_zip "$filepath"
            print_status "Successfully processed: $url"
            return 0
        else
            # Clean up on extraction failure
            cleanup_zip "$filepath"
            return 1
        fi
    else
        return 1
    fi
}

# Main execution function
main() {
    print_status "Starting zip download and extraction script"
    print_status "URLs file: $URLS_FILE"
    print_status "Target directory: $TARGET_DIR"
    print_status "Temporary directory: $TEMP_DIR"
    
    # Check dependencies
    check_dependencies
    
    # Validate inputs
    validate_inputs
    
    # Setup temporary directory
    setup_temp_dir
    
    # Process each URL
    local success_count=0
    local failure_count=0
    local total_count=0
    local failed_urls=()
    
    while IFS= read -r url; do
        # Skip empty lines and comments
        if [[ -z "$url" || "$url" =~ ^[[:space:]]*# ]]; then
            continue
        fi
        
        # Remove leading/trailing whitespace
        url=$(echo "$url" | xargs)
        
        total_count=$((total_count + 1))
        
        if process_url "$url"; then
            success_count=$((success_count + 1))
        else
            failure_count=$((failure_count + 1))
            failed_urls+=("$url")
        fi
        
        echo "" # Add blank line between processes
    done < "$URLS_FILE"
    
    # Final cleanup - remove temp directory if empty
    if [ -d "$TEMP_DIR" ] && [ -z "$(ls -A "$TEMP_DIR")" ]; then
        rmdir "$TEMP_DIR"
        print_status "Removed empty temporary directory"
    fi
    
    # Print summary
    print_status "Processing complete!"
    print_status "Total URLs processed: $total_count"
    print_status "Successful: $success_count"
    if [ $failure_count -gt 0 ]; then
        print_warning "Failed: $failure_count"
        echo ""
        print_error "Failed URLs:"
        for failed_url in "${failed_urls[@]}"; do
            echo "  - $failed_url"
        done
        echo ""
        print_status "You can create a new URLs file with just the failed URLs to retry them."
    fi
}

# Run the main function
main "$@"

Bash Public

Snippet

#!/bin/bash

# Configuration variables - modify these as needed
URLS_FILE="/path/to/urls.txt"              # Path to file containing list of ZIP URLs
TARGET_DIR="/path/to/target/directory"      # Directory where files will be extracted
TEMP_DIR="/tmp/zip_downloads"              # Temporary directory for downloads

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Function to print colored output
print_status() {
    echo -e "${GREEN}[INFO]${NC} $1"
}

print_warning() {
    echo -e "${YELLOW}[WARNING]${NC} $1"
}

print_error() {
    echo -e "${RED}[ERROR]${NC} $1"
}

# Function to check if required tools are installed
check_dependencies() {
    local missing_deps=()
    
    command -v wget >/dev/null 2>&1 || command -v curl >/dev/null 2>&1 || missing_deps+=("wget or curl")
    command -v unzip >/dev/null 2>&1 || missing_deps+=("unzip")
    
    if [ ${#missing_deps[@]} -ne 0 ]; then
        print_error "Missing dependencies: ${missing_deps[*]}"
        exit 1
    fi
}

# Function to validate inputs
validate_inputs() {
    # Check if URLs file exists
    if [ ! -f "$URLS_FILE" ]; then
        print_error "URLs file not found: $URLS_FILE"
        exit 1
    fi
    
    # Check if URLs file is readable
    if [ ! -r "$URLS_FILE" ]; then
        print_error "Cannot read URLs file: $URLS_FILE"
        exit 1
    fi
    
    # Check if target directory exists, create if it doesn't
    if [ ! -d "$TARGET_DIR" ]; then
        print_warning "Target directory doesn't exist. Creating: $TARGET_DIR"
        mkdir -p "$TARGET_DIR"
        if [ $? -ne 0 ]; then
            print_error "Failed to create target directory: $TARGET_DIR"
            exit 1
        fi
    fi
}

# Function to create temporary directory
setup_temp_dir() {
    if [ ! -d "$TEMP_DIR" ]; then
        mkdir -p "$TEMP_DIR"
        if [ $? -ne 0 ]; then
            print_error "Failed to create temporary directory: $TEMP_DIR"
            exit 1
        fi
    fi
}

# Function to download file
download_file() {
    local url="$1"
    local filename="$2"
    local filepath="$TEMP_DIR/$filename"
    
    print_status "Downloading: $url"
    
    # Try wget first, then curl
    if command -v wget >/dev/null 2>&1; then
        wget -q -O "$filepath" "$url"
    elif command -v curl >/dev/null 2>&1; then
        curl -s -L -o "$filepath" "$url"
    else
        print_error "Neither wget nor curl is available"
        return 1
    fi
    
    if [ $? -eq 0 ] && [ -f "$filepath" ]; then
        print_status "Downloaded successfully: $filename"
        return 0
    else
        print_error "Failed to download: $url"
        return 1
    fi
}

# Function to extract zip file
extract_zip() {
    local zipfile="$1"
    local filename=$(basename "$zipfile")
    
    print_status "Extracting: $filename"
    
    # Extract to target directory
    unzip -q -o "$zipfile" -d "$TARGET_DIR"
    
    if [ $? -eq 0 ]; then
        print_status "Extracted successfully: $filename"
        return 0
    else
        print_error "Failed to extract: $filename"
        return 1
    fi
}

# Function to cleanup downloaded zip file
cleanup_zip() {
    local zipfile="$1"
    local filename=$(basename "$zipfile")
    
    print_status "Cleaning up: $filename"
    
    rm -f "$zipfile"
    
    if [ $? -eq 0 ]; then
        print_status "Cleaned up: $filename"
    else
        print_warning "Failed to clean up: $filename"
    fi
}

# Function to process a single URL
process_url() {
    local url="$1"
    local filename=$(basename "$url")
    
    # If filename doesn't end in .zip, append .zip
    if [[ ! "$filename" =~ \.zip$ ]]; then
        filename="${filename}.zip"
    fi
    
    local filepath="$TEMP_DIR/$filename"
    
    print_status "Processing: $url"
    
    # Download the file
    if download_file "$url" "$filename"; then
        # Extract the file
        if extract_zip "$filepath"; then
            # Clean up the downloaded zip
            cleanup_zip "$filepath"
            print_status "Successfully processed: $url"
            return 0
        else
            # Clean up on extraction failure
            cleanup_zip "$filepath"
            return 1
        fi
    else
        return 1
    fi
}

# Main execution function
main() {
    print_status "Starting zip download and extraction script"
    print_status "URLs file: $URLS_FILE"
    print_status "Target directory: $TARGET_DIR"
    print_status "Temporary directory: $TEMP_DIR"
    
    # Check dependencies
    check_dependencies
    
    # Validate inputs
    validate_inputs
    
    # Setup temporary directory
    setup_temp_dir
    
    # Process each URL
    local success_count=0
    local failure_count=0
    local total_count=0
    local failed_urls=()
    
    while IFS= read -r url; do
        # Skip empty lines and comments
        if [[ -z "$url" || "$url" =~ ^[[:space:]]*# ]]; then
            continue
        fi
        
        # Remove leading/trailing whitespace
        url=$(echo "$url" | xargs)
        
        total_count=$((total_count + 1))
        
        if process_url "$url"; then
            success_count=$((success_count + 1))
        else
            failure_count=$((failure_count + 1))
            failed_urls+=("$url")
        fi
        
        echo "" # Add blank line between processes
    done < "$URLS_FILE"
    
    # Final cleanup - remove temp directory if empty
    if [ -d "$TEMP_DIR" ] && [ -z "$(ls -A "$TEMP_DIR")" ]; then
        rmdir "$TEMP_DIR"
        print_status "Removed empty temporary directory"
    fi
    
    # Print summary
    print_status "Processing complete!"
    print_status "Total URLs processed: $total_count"
    print_status "Successful: $success_count"
    if [ $failure_count -gt 0 ]; then
        print_warning "Failed: $failure_count"
        echo ""
        print_error "Failed URLs:"
        for failed_url in "${failed_urls[@]}"; do
            echo "  - $failed_url"
        done
        echo ""
        print_status "You can create a new URLs file with just the failed URLs to retry them."
    fi
}

# Run the main function
main "$@"

By xtream1101 • Created 2025-08-05 13:35 • Updated 2025-08-05 13:35