#!/bin/bash
#################################################################
# scan_2.sh - Michael Karr
# Scans accounts/servers for malware/shells via clamav
#
# https://stash.endurance.com/projects/HGADMIN/repos/scan/browse
# Confluence: https://confluence.endurance.com/display/HGS/Scan
#
# Please submit all bug reports at https://jira.endurance.com/
#
# (c) 2012 - HostGator.com, LLC.
#################################################################

# distribution urls

umask 0077

DISTURL="https://cleanacct.soc.newfold.com/clamav/clamav-dist"
DISTKEY="R3k0sKsH96GNH8"

DIST64="$DISTURL/clamav-64bit.tar.gz"
DISTRHEL9="$DISTURL/clamav-el9-64bit.tar.gz"

DEFS="$DISTURL/defs.tar.gz"
MDEFS="$DISTURL/defs-malware.tar.gz"
SDEFS="$DISTURL/defs-submitsh.tar.gz"

# default configuration values

DEBUG=0 # display extra debugging info
QUIET=0 # supress output
MALWARE=0 # use extra malware definitions
SUBMITSH=1 # use submitsh definitions
SYMLINKS=0 # follow symlinks when scanning
RESELLERMODE=0 # scan an entire reseller
LISTMODE=0 # scan a given list of files
NOLOG=0 # don't write to scanreport.txt
MAXSIZE=1 # don't scan files larger than 1 MB

SCANSHM=1 # scan /dev/shm by default when scanning for a user
SCANTMP=1 # scan /tmp by default when scanning for a user

TEMPFOLDER="/root/tmp" # folder where we are going to store our temporary data
TEMPCLAM="$TEMPFOLDER/scan" # folder where clamav will live after extraction

# base clamav arguments

CLAMAV_ARGUMENTS="-i -r --max-recursion=200 --max-dir-recursion=200 --exclude-dir=/access-logs/ \
--exclude-dir=\"/home/backup-*\" --exclude-dir=/mail/ --exclude-dir=/awstats/ --exclude-dir=/awstats/ \
--exclude-dir=/webalizer/ --exclude-dir=/analog/ --exclude-dir=/.security/ --exclude-dir=/cpbandwidth/ \
--exclude-dir=/webalizerftp/ --exclude-dir=/MYSQL_DATA/ --scan-mail=no --scan-pe=no \
--scan-archive=no --cross-fs=no --bytecode-timeout=5 --phishing-sigs=no --phishing-scan-urls=no \
--scan-ole2=no --scan-pdf=no --block-encrypted=yes --exclude-dir=/cpeasyapache/ --exclude-dir=/virtfs/"

# architechture and server information

SHORTHOST=$(hostname -s)
CWD=$(pwd)

RUNDATE=$(date +%s)
LOGS=("$TEMPFOLDER/scanlog.$RUNDATE")

TMPDIRS="/tmp /etc/proxy_conf /etc/cpanel/proxy_conf $(find /opt/*/*/root/{tmp,var/tmp} -maxdepth 0 -perm 1777 2>/dev/null) $(find /opt/cpanel/ea-ruby*/root/var/run/passenger-instreg/*/apps.s -maxdepth 0 2>/dev/null)"

# utility subroutines

output_log() {
    for log in ${LOGS[@]}; do
        echo -e "$*" >> "$log"
    done
}

print() {
    OUTSTR="$SHORTHOST: $(date): $*"

    if [ $QUIET -eq 0 ]; then echo -e "$OUTSTR"; fi
    output_log "$OUTSTR"
}

print_debug() {
    if [ $DEBUG -eq 1 ]; then print "$*"; fi
}

prompt() {
    OUTSTR="$SHORTHOST: $(date): $*"

    read -p "$OUTSTR"
    output_log "$OUTSTR$REPLY"
}

exit_error() {
    print "$*"
    exit 1
}

home_base_path() {    
    if [ -e "/etc/psa/.psa.shadow" ]; then
        echo "/var/www/vhosts"
    else
        echo "/home"
    fi
}

user_from_path() {
    REPATH="$1"

    if [ -e "/etc/psa/.psa.shadow" ]; then
        CWDRE="/var/vhosts/([^/]*)"
    else
        CWDRE="/home[0-9]*/([^/]*)"
    fi

    if [[ "$REPATH" =~ $CWDRE ]]; then
        echo ${BASH_REMATCH[1]}
    fi
}

path_from_user() {
    if [ -e "/etc/psa/.psa.shadow" ]; then
        HOMEDIR="/var/www/vhosts/$1"
    else
        HOMEDIR=$(grep -- "^$1:" /etc/passwd | cut -d: -f6)
    fi

    if [ -d "$HOMEDIR" ]; then
        HOMEDIRABS=$(readlink -f $HOMEDIR) # account for symlinks/strange paths
        echo "$HOMEDIRABS"
    fi
}

# worker subroutines

usage() {
    echo -e "Usage:\n\n$0 [options] [username]\n"
    echo -e "Options:\n"
    echo -e "--all\n-a\n\n    Scan all of '/home' (cPanel) or '/var/www/vhosts' (Plesk).\n"
    echo -e "--clean\n-c\n\n    Remove detected threats.\n"
    echo -e "--cwd\n\n    Scan the current working directory.\n"
    echo -e "--debug\n-d\n\n    Display extra debugging information.\n"
    echo -e "--follow-symlinks\n-f\n\n    Follow symlinks when scanning/cleaning. Use with caution.\n"
    echo -e "--malware\n-m\n\n    Use the extra malware definitions (sometimes can be very slow).\n"
    echo -e "--notmp\n\n    Do not scan '/dev/tmp'.\n"
    echo -e "--noshm\n\n    Do not scan '/dev/shm'.\n"
    echo -e "--path (path)\n-p (path)\n\n    Scan the given path.\n"
    echo -e "--reseller (user)\n-r (user)\n\n    All sub-accounts for the given reseller user.\n"
    echo -e "--simple\n-s\n\n    Display only detected threats. All other output is supressed.\n"
    echo -e "--usage\n-u\n\n    Display this usage message.\n"
    echo -e "--nolog\n-u\n\n    Don't write to scanreport.txt\n"
    echo -e "--large\n\n    Scan files as large as 30 MB\n"

    exit 1
}

prepare_clamav_opts() {
    CLAMAV_ARGUMENTS="${CLAMAV_ARGUMENTS} --max-filesize=${MAXSIZE}M"
    if [ "$CLEAN" == "nuke" ]; then
        print "WARNING: Detected risks will be removed entirely, forever."
        print "WARNING: This is permanent. If you answer \"y\" and destroy something, it is on you."
        prompt "WARNING: Are you sure you want to completely remove all matched files (y/n)? "

        if [[ $REPLY != [yY] ]]; then
            exit_error "Bailing out because the user is not sure what they want."
        fi

        CLAMAV_ARGUMENTS="${CLAMAV_ARGUMENTS} --remove"
    elif [ "$CLEAN" == "move" ]; then
	mkdir -p "$BACKUPDIR"
	CLAMAV_ARGUMENTS="${CLAMAV_ARGUMENTS} --move=$BACKUPDIR"
    fi

    if [ $SYMLINKS -eq 1 ]; then
        CLAMAV_ARGUMENTS="${CLAMAV_ARGUMENTS} --follow-dir-symlinks=1"
    else
        CLAMAV_ARGUMENTS="${CLAMAV_ARGUMENTS} --follow-dir-symlinks=0"
    fi

    if [ $QUIET -eq 1 ]; then
        CLAMAV_ARGUMENTS="${CLAMAV_ARGUMENTS} --no-summary"
    fi
}

cleanup() {
    # make sure temp folders exist
    mkdir -p "$TEMPCLAM" &>/dev/null
    mkdir -p "$TEMPFOLDER" &>/dev/null

    # if we were started from anywhere but /root/bin, put this file in /root/tmp so it's cleaned up.

    if [[ ! $0 =~ "/root/bin/scan" ]]; then
        # if we are not a real file, then do not do anything, otherwise strange things happen

        if [ -f $0 ]; then
            print "Removing self."

            # move to temp folder so is removed later
            mv $0 "$TEMPCLAM/" &>/dev/null
        fi
    fi

    # move to working dir

    print_debug "Temporary folder: $TEMPFOLDER"
    cd "$TEMPFOLDER"

    if [[ -z "$(pgrep clamscan)" ]]; then
        # if clamscan isnt already running, clean up possible old versions of clamav

        print "Cleaning up."

        rm -fr ./clamav &>/dev/null
        rm -f ./clamav.t* &>/dev/null
        rm -f ./clamav-dist.* &>/dev/null
        rm -f ./clamav-*bit.tar.gz &>/dev/null
        rm -f ./defs*.tar.gz &>/dev/null
        rm -f ./temp.tar.gz &>/dev/null
    else
        # skip, otherwise
        print_debug "Skipping cleanup."
    fi
}

prepare_clamav() {
    if [[ -z "$(pgrep clamscan)" ]]; then
        # if clamscan isnt running already, download the appropriate clamav packages

        print "Preparing ClamAV."
        if [[ -f '/etc/redhat-release' ]]; then
          _os_ver=$(/bin/rpm --eval='%centos_ver')
          if [[ "${_os_ver}" -lt 9 ]]; then
            DISTPKG=$DIST64
          else
            DISTPKG=$DISTRHEL9
          fi
        else
          exit_error "no ClamAV package available this operating system"
        fi

        EXTARCHIVES=("$DISTPKG" "$DEFS")

        # add in the malware archive, if wanted

        if [ "$MALWARE" -eq 1 ]; then
            print_debug "Using malware definitions."
            EXTARCHIVES=("${EXTARCHIVES[@]}" "$MDEFS")
        fi
        if [ "$SUBMITSH" -eq 1 ]; then
            print_debug "Using submitsh definitions."
            EXTARCHIVES=("${EXTARCHIVES[@]}" "$SDEFS")
        fi

        /bin/chmod 700 .

        # download and extract archives

        for arch in ${EXTARCHIVES[@]}; do
            print_debug "Downloading and extracting: $arch"
            wget --no-check-certificate -q -O temp.tar.gz "$arch?key=$DISTKEY" &>/dev/null
            tar -zxf temp.tar.gz &>/dev/null
            rm -f ./temp.tar.gz &>/dev/null
        done

        # correct ownership on files

        chown -R root.root clamav
        find clamav/ ! -perm /100 ! -type l -exec /bin/chmod 600 {} +
        find clamav/ -perm /100 ! -type l -exec /bin/chmod 700 {} +

        print "Preparation completed."
    else
        print_debug "Skipping prepare."
    fi

    cd clamav/bin
}

prepare_tmpshm() {
    if [ "$NOLOG" -eq 0 ]; then
        if [[ ! -d $SCANDIR/.security ]]
        then
            mkdir $SCANDIR/.security
        fi
    fi

    if [ "$SCANSHM" -eq 1 ]; then
        SHMFILES="$SCANDIR/.security/shmfiles.$RUNDATE"

        if [ -n "$MYUSER" ]; then
            find /dev/shm -type f -user "$MYUSER" > $SHMFILES 2> /dev/null
        else
            find /dev/shm -type f > $SHMFILES 2> /dev/null
        fi
    fi

    if [ "$SCANTMP" -eq 1 ]; then
        TMPFILES="$SCANDIR/.security/tmpfiles.$RUNDATE"

        if [ -n "$MYUSER" ]; then
            find $TMPDIRS -type f -user "$MYUSER" > $TMPFILES 2> /dev/null
        else
            find $TMPDIRS -type f > $TMPFILES 2> /dev/null
        fi
    fi
}

clam_av_scan() {

    if [[ ${all_opt} == "True" ]]
    then
        SCANDIR="/home"
    fi
    
    print "Scan directory: $SCANDIR"
    print "Backup directory: $BACKUPDIR"
    print_debug "ClamAV arguments: $CLAMAV_ARGUMENTS"

    if [ "$NOLOG" -eq 1 ]; then
        USER_FILE="/dev/null"
    else
        USER_FILE="$SCANDIR/scanreport.txt"
        if [ -s $USER_FILE ] ; then
            mv -f $USER_FILE{,.$(stat -c '%Y' $USER_FILE)}
        fi
    fi

    TEEARGS="$USER_FILE"

    for log in ${LOGS[@]}; do
        TEEARGS="$TEEARGS -a $log"
    done

    print_debug "Tee arguments: $TEEARGS"
    print "Scanning.\n";

    if ! [ "$NOLOG" -eq 1 ]; then
        touch "$USER_FILE"
        chown $(stat -c "%U" "$SCANDIR"). "$USER_FILE"
    fi

    ./clamscan $CLAMAV_ARGUMENTS "$SCANDIR" 2> /dev/null | tee $TEEARGS

    if [ "$SCANSHM" -eq 1 ]; then
        if [ $QUIET -eq 0 ]; then echo | tee $TEEARGS; fi
        print "Scanning '/dev/shm'.\n";
        ./clamscan $CLAMAV_ARGUMENTS -f "$SHMFILES" 2> /dev/null | tee $TEEARGS
    fi

    if [ "$SCANTMP" -eq 1 ]; then
        if [ $QUIET -eq 0 ]; then echo | tee $TEEARGS; fi
        print "Scanning '/tmp'.\n";
        ./clamscan $CLAMAV_ARGUMENTS -f "$TMPFILES" 2> /dev/null | tee $TEEARGS
    fi

    if [ $QUIET -eq 0 ]; then echo | tee $TEEARGS; fi

    if ! [ "$NOLOG" -eq 1 ]; then
        if [ -s $USER_FILE ] ; then
            chown $MYUSER. $USER_FILE
        fi
    fi

    print "Scan Complete."

    cleanup
}

clam_av_list_scan() {
    print "Scan list: $LISTFILE"
    print_debug "ClamAV arguments: $CLAMAV_ARGUMENTS"

    TEEARGS=""

    for log in ${LOGS[@]}; do
        TEEARGS="$TEEARGS -a $log"
    done

    print_debug "Tee arguments: $TEEARGS"
    print "Scanning.\n";

    ./clamscan $CLAMAV_ARGUMENTS -f "$LISTFILE" 2> /dev/null | tee $TEEARGS

    if [ $QUIET -eq 0 ]; then echo | tee $TEEARGS; fi
    print "Scan Complete."

    cleanup
}

clear_lock() {
    print_debug "Clearing Lock File: $LOCKFILE"
    rm -f "$LOCKFILE" 2> /dev/null
}

set_lock() {
    print_debug "Setting Lock File: $LOCKFILE"
    LOCKDIR=$(dirname "$LOCKFILE")

    if [ -e "$LOCKFILE" ]; then
        print_debug "Scan already in progress for directory '$LOCKDIR'."
    else
        trap "clear_lock; exit 1" SIGINT SIGTERM SIGQUIT SIGHUP
        mkdir -p "$LOCKDIR"
        echo -n "$STY" > "$LOCKFILE"
    fi
}

# main routine

while [ "0" -ne "$#" ]; do
    case "$1" in
        --all | -a )
            SCANDIR=$(home_base_path) ;;
        --large )
            MAXSIZE=30 ;;
        --clean | -c )
            CLEAN="move" ;;
        --cwd )
            SCANSHM=0
            SCANTMP=0
            SCANDIR=$(readlink -f $CWD) ;;
        --debug | -d )
            DEBUG=1 ;;
        --follow-symlinks | -f )
            SYMLINKS=1 ;;
        --help | -h | --usage | -u )
            usage ;;
        --database )
            shift # junk the already read argument
            CLAMAV_ARGUMENTS="${CLAMAV_ARGUMENTS} --normalize=no --database=$(readlink -f $1)" ;;
        --nolog )
            NOLOG=1 ;;
        --list | -l )
            shift # junk the already read argument
            LISTMODE=1
            LISTFILE=$(readlink -f $1) ;;
        --malware | -m )
            MALWARE=1 ;;
        --nosubmitsh )
            SUBMITSH=0 ;;
        --noshm )
            SCANSHM=0 ;;
        --notmp )
            SCANTMP=0 ;;
        --omg | --ohmygod)
            CLEAN="nuke" ;;
        --path | -p )
            shift # junk the already read argument
            SCANSHM=0
            SCANTMP=0
            SCANDIR=$(readlink -f $1) ;;
        --reseller | -r )
            shift # junk the already read argument
            RESELLERMODE=1
            RESELLER=$1 ;;
        --simple | -s )
            QUIET=1 ;;
        * )
            MYUSER=$1
            SCANDIR=$(path_from_user $1);;
    esac
    shift
done

if [ "$RESELLERMODE" -eq 1 ]; then
    RESOLDUSRS=($(grep -P ": $RESELLER\$" /etc/trueuserowners | cut -d: -f1 | sort))

    if [ ${#RESOLDUSRS[*]} -gt 0 ]; then
        RESELLERDIR=$(path_from_user $RESELLER)

        # set lock on reseller dir

        LOCKFILE="$RESELLERDIR/.security/.scan_in_progress"
        set_lock

        # setup logging for main reseller user

        RESELLLOG="$RESELLERDIR/.security/scanlog.reseller.$RUNDATE"
        RESELLLOGPATH=$(dirname "$RESELLLOG")
        mkdir -v -p "$RESELLLOGPATH"
        LOGS=("${LOGS[@]}" "$RESELLLOG")

        print "Processing reseller: $RESELLER"

        for log in ${LOGS[@]}; do
            print "Logging to: $log"
        done

        cleanup # cleanup
        prepare_clamav # prepare the environment for scanning and download clamav

        # process resold users

        for resold in  ${RESOLDUSRS[@]}; do
            MYUSER=$resold
            USERDIR=$(path_from_user $MYUSER)
            SCANDIR=$USERDIR

            BACKUPDIR="$SCANDIR/.security/scanbackup.$RUNDATE"

            # setup logging for specific user
            USERLOG="$SCANDIR/.security/scanlog.$RUNDATE"
            USERLOGPATH=$(dirname "$USERLOG")
            mkdir -v -p "$USERLOGPATH"
            LOGS=("${LOGS[@]}" "$USERLOG")

            print "Processing user: $MYUSER"
            print "Logging to: $USERLOG"

            prepare_tmpshm # prepare tmp and shm searches
            prepare_clamav_opts # prepare clamav options
            clam_av_scan # go ahead and scan

            # remove logging for user

            unset LOGS[${#LOGS[@]}-1]
        done

        clear_lock # remove lock for reseller dir
    else
        exit_error "No accounts found for specified reseller. Exiting."
    fi
elif [ "$LISTMODE" -eq 1 ]; then
    if [ -f "$LISTFILE" ]; then
        # we don't need to setup any additional log directories
        # we don't know where they would need to go

        for log in ${LOGS[@]}; do
            print "Logging to: $log"
        done

        cleanup # cleanup
        prepare_clamav # prepare the environment for scanning and download clamav

        clam_av_list_scan
    else
        exit_error "File '$LISTFILE' does not appear to exist. Exiting."
    fi
else
    # if we weren't given any username or arbitrary path to scan, try to detect it from the cwd

    if [ ! -d "$SCANDIR" ]; then
        MYUSER=$(user_from_path $CWD)
        USERDIR=$(path_from_user $MYUSER)

        if [ -d "$USERDIR" ]; then
            SCANDIR=$USERDIR
        else
            exit_error "Failed to detect user from CWD or invalid user given. Exiting."
        fi
    fi

    # check to see if the path we are scanning is in a user's homedir

    MYUSER=$(user_from_path $SCANDIR)
    USERDIR=$(path_from_user $MYUSER)

    if [ -n $MYUSER ] && [ -d "$USERDIR" ]; then # if we are, setup logging and backups in the users .security folder
        USERDIR=$(path_from_user $MYUSER)
        LOCKFILE="$USERDIR/.security/.scan_in_progress"
        BACKUPDIR="$USERDIR/.security/scanbackup.$RUNDATE"
        LOGS=("${LOGS[@]}" "$USERDIR/.security/scanlog.$RUNDATE")
    else # otherwise, place in /root/tmp
        LOCKFILE="/root/tmp/.security$SCANDIR/.scan_in_progress"
        BACKUPDIR="/root/tmp/.security$SCANDIR/scanbackup.$RUNDATE"
    fi

    for log in ${LOGS[@]}; do
        LOGPATH=$(dirname "$log")
        mkdir -v -p "$LOGPATH"
    done

    for log in ${LOGS[@]}; do
        print "Logging to: $log"
    done
inode_count () {

print "Counting inodes"
    MAX=300000
    quotaout=$(/usr/bin/quota -l $user 2> /dev/null)
    if echo -e "$quotaout" | grep -Pq ': none$';
    then
       print "Quotas disabled"
       slowcheck=1
    else
        quotainodes=$(echo -e $quotaout | tail -n +3 | awk '{print $5}' | sort -nr | head -n1)
        if [[ $quotainodes  > $MAX ]]
        then
           print "Account may have too many inodes under working directory"
           slowcheck=1
        else
           print "Account is under inode limit, proceeding"
        fi
    fi

    if [[ $slowcheck = 1 ]]
    then
        print "Performing slow inode count. Please wait"
        slowlimit=$(($MAX+1))
        slowcount=$(find $SCANDIR -type f -printf '.' | head -c $slowlimit| wc -c)
        if (( $slowcount > $MAX ))
        then
           print "Directory is over inode limit, aborting"
           return;
        fi
    fi
}


    set_lock # make sure we aren't already running
    cleanup # cleanup
    prepare_clamav # prepare the environment for scanning and download clamav
    #inode_count #count inode usage (don't: this never aborted and doesn't work now)
    prepare_tmpshm # prepare tmp and shm searches
    prepare_clamav_opts # prepare clamav options
    clam_av_scan # go ahead and scan
    clear_lock # remove lock on scan dir
fi
