#!/bin/sh

set -ex

ISSUE_NUMBER=$(jq ".issue.number" "${GITHUB_EVENT_PATH}")
BRANCH_NAME="cve-vex-issue-${ISSUE_NUMBER}"
ISSUE_BODY=$(gh issue view --json body "${ISSUE_NUMBER}" | jq ".body")
VEX_STRING="text-vex"
# Extracts only the field with the data that will be VEXed from the issue body.
# The field is inside the strings:
# ```text-vex
# the CVE data
# ...
# more CVE data
# ```
# This makes the parsing easier, so anything outside of the field can be safely
# ignored.
VEX_BODY=$(echo "${ISSUE_BODY}" | sed "s/.*\`\`\`${VEX_STRING}\\\r\\\n\(.*\)\`\`\`.*/\1/")
PR_TITLE="[VEX CVE] Issue #${ISSUE_NUMBER}"
PR_NUMBER=$(gh pr list -S "is:pr is:open \"${PR_TITLE}\"" --json number --jq ".[] | .number")
PR_LABEL="cve-vex-automated"

# The script relies too much on subshells, so we use trap to catch when a
# subshell errored and needs to exit.
trap "exit 1" 9

# check_go_modpath() retrieves the Go mod path of an affected Go binary.
# The first parameter is the image name.
# The second is the affected Go binary, with the full path inside the image, as
# reported by Trivy.
check_go_modpath() {
    local image="${1}"
    local go_bin="${2}"
    local tmp_dir=$(mktemp -d)
    local wkd_dir=$(pwd)

    skopeo copy -q docker://"${image}" dir://"${tmp_dir}"
    cd ${tmp_dir}

    # Untar all the layers, as we don't know in which one the binary might be.
    local layers=$(jq -r '.layers[].digest' manifest.json | sed "s/^sha256://")
    for layer in ${layers}; do
        tar -xf "${layer}"
    done

    local go_modpath=$(go version -m "${go_bin}" | grep -P "^\tmod\t" | sed -E "s/\s+mod\s+//; s/\s+.*//")
    if [[ "${go_modpath}" = "" ]]; then
        local msg="Unable to find a Go mod path for the binary ${go_bin} in image ${image}"
        echo "[ERROR] ${msg}"
        issue_add_comment "${msg}."
        fatal
    fi

    cd "$wkd_dir"
    rm -rf "$tmp_dir"

    echo "${go_modpath}"
}

# get_csv_field() retrieves a given field from a CSV formatted string.
# The first param is the string from where to extract.
# The second param is the field number to extract.
get_csv_field() {
    local csv_line="${1}"
    local csv_field="${2}"

    echo "${csv_line}" | awk --csv '{ print $'${csv_field}'}'
}

# get_product() generates the PURL formatted product id for the VEX entry.
# The first parameter is the CVE type.
# The second is the image name.
# The third is the affected binary.
get_product() {
    local cve_type="${1}"
    local image="${2}"
    local target="${3}"
    local product_id=""

    # If the CVE affects a Go binary, then the product is the Go mod path of the
    # binary.
    if [ "${cve_type}" = "gobinary" ]; then
        local pid=$(check_go_modpath "${image}" "${target}")
        product_id="pkg:golang/${pid}"
    else
        # Remove any tag related data from the image.
        local image_no_tag=$(echo "${image}" | sed "s/:.*//")
        # Extract only the name of the image, removing any information about the
        # registry or repo.
        local image_basename=$(basename "${image_no_tag}")

        # If the image doesn't have a dot '.', that indicates that it doesn't
        # contains the registry address, so we add the Prime registry as its
        # source.
        if echo "${image_no_tag}" | grep -qv "\."; then
            image_no_tag="${PRIME_REGISTRY}/${image_no_tag}"
        fi

        # Replace all instances of '/' with its corresponding URL encoding '%2F'.
        # This is needed for the PURL format.
        image_no_tag=$(echo "${image_no_tag}" | sed -E "s/\//%2F/g")
        product_id="pkg:oci/${image_basename}?repository_url=${image_no_tag}"
    fi

    echo "${product_id}"
}

# get_subcomponent() generates the PURL formatted subcomponent id for the VEX
# entry.
# The first parameter is the CVE type.
# The second and third are the affected package name and version, respectively.
# The fourth is the affected binary.
get_subcomponent() {
    local cve_type="${1}"
    local pkg_name="${2}"
    local pkg_version="${3}"
    local target="${4}"
    local subcomponent_id=""

    # Other languages and OS distros must be matched here if needed.
    if [ "${cve_type}" = "gobinary" ]; then
        subcomponent_id="pkg:golang/${pkg_name}@${pkg_version}"
    elif echo "${cve_type}" | grep -qi "opensuse"; then
        subcomponent_id="pkg:rpm/opensuse.leap/${target}"
    elif echo "${cve_type}" | grep -qi "suse"; then
        subcomponent_id="pkg:rpm/sles/${target}"
    elif echo "${cve_type}" | grep -qi "alpine"; then
        subcomponent_id="pkg:apk/alpine/${target}"
    elif echo "${cve_type}" | grep -qi "redhat"; then
        subcomponent_id="pkg:rpm/redhat/${target}"
    elif echo "${cve_type}" | grep -qi "debian"; then
        subcomponent_id="pkg:deb/debian/${target}"
    elif echo "${cve_type}" | grep -qi "ubuntu"; then
        subcomponent_id="pkg:deb/ubuntu/${target}"
    fi

    echo "${subcomponent_id}"
}


# check_issue_body verifies if the issue body is formatted as expected,
# otherwise make a comment in the issue and fail.
check_issue_body() {
    local issue_labels=$(gh issue view "${ISSUE_NUMBER}" --json labels --jq ".labels[].name")

    if ! echo "${issue_labels}" | grep -q "^${PR_LABEL}$"; then
        echo "[INFO] Exiting, this is not a VEX CVE issue"
        exit 0
    fi

    if [ "${ISSUE_BODY}" = "${VEX_BODY}" ]; then
        local msg="The issue body is not formatted as expected, please check and correct"
        echo "[ERROR] ${msg}"
        issue_add_comment "${msg}."
        fatal
    fi
}

# check_issue_state() verifies if the issue state is one of the expected. If the
# state is 'CLOSED', then we delete the respective remote branch and close the
# PR in case it's open. If the state is 'OPEN', we proceed as expected. For any
# other state, we fail.
check_issue_state() {
    local issue_action=$(jq -r ".action" "${GITHUB_EVENT_PATH}")
    local issue_state=$(gh issue view "${ISSUE_NUMBER}" --json state --jq ".state")

    if [ "${issue_action}" = "deleted" ] || [ "${issue_action}" = "transferred" ] || [ "${issue_state}" = "CLOSED" ]; then
        echo "[INFO] Issue ${ISSUE_NUMBER} was ${issue_action}"

        local ret=$(git ls-remote origin "${BRANCH_NAME}")
        if [ "${ret}" != "" ]; then
            echo "[INFO] Deleting remote branch ${BRANCH_NAME}"
            git push -d origin "${BRANCH_NAME}"
        fi

        if [ "${PR_NUMBER}" != "" ]; then
            echo "[INFO] Closing PR ${PR_NUMBER}"
            gh pr close "${PR_NUMBER}" -c "Issue closed."
        fi

        exit 0
    elif [ "${issue_state}" != "OPEN" ]; then
        echo "[ERROR] Exiting due to an unknown issue state ${issue_state} for issue ${ISSUE_NUMBER}"
        fatal
    fi
}

# check_status_justification() verifies if the passed status or justification is
# one of the accepted in the config file.
# The first param must be the field to check - 'status' or 'justification'.
# The second param is the value to check.
check_status_justification() {
    local key="${1}"
    local value="${2}"

    if ! yq ".cves.${key}" ${CONFIG_FILE} | grep -q "\"${value}\""; then
        local msg="The status or justification isn't according the config file, please check and correct"
        echo "[ERROR] $msg"
        issue_add_comment "${msg}."
        fatal
    fi
}

# issue_add_comment() adds a comment in the respective issue.
# The first param is the comment message.
issue_add_comment() {
    local msg="${1}"
    local cwd=$(pwd)

    cd "${WORKING_DIR}"
    gh issue comment "${ISSUE_NUMBER}" -b "${msg}"
    cd "${cwd}"
}

# git_commit() commits the updated CVE file and create the PR. The branch is
# deterministic and based on the originating issue number. Always create the
# branch locally, even if remote already has one, and then force-push it. This
# makes the process easier, as it doesn't require to deal with possible
# conflicts. If there is already a PR opened, then we update it. The PR is
# linked to the originating issue, so the issue can be automatically closed once
# the PR is merged.
git_commit() {
    if git status | grep -q $(basename $(echo "${CVE_FILE}")); then
        git checkout -b "${BRANCH_NAME}"
        git add "${CVE_FILE}" "${REPORTS_VEX_DIR}"
        git commit -m "VEX issue #${ISSUE_NUMBER}"
        git push -f origin ${BRANCH_NAME}

        local pr_action="create -l ${PR_LABEL}"
        if [ "${PR_NUMBER}" != "" ]; then
            pr_action="edit ${PR_NUMBER} --add-label ${PR_LABEL}"
        fi
        gh pr ${pr_action} -b "Closes #${ISSUE_NUMBER}" -t "${PR_TITLE}"
    fi
}

# vex_cve() VEXes the requested CVEs with the right VEX status and
# justification.
vex_cve() {
    # printf the body to remove formatting chars like new line.
    local formatted_vex_body=$(printf "${VEX_BODY}")
    local tmp_csv_file=$(mktemp)

    # The body is expected to be in the following format:
    # - One CVE entry from the CVE CSV file per line.
    # - Each entry in CSV format with the following fields:
    #   - image_version,vuln_id,pkg_name,pkg_version,target,status,justification
    #     - image = image name and version
    #     - vuln_id = the vulnerability ID
    #     - pkg_name = the name of the affected pkg
    #     - pkg_version = the version of the affected pkg
    #     - target = the binary or OS pkg that is affected by the vulnerability
    #     - status = the VEX status
    #     - justification = the VEX justification
    #     - note = extra info about how the false-positive was identified
    echo "${formatted_vex_body}" | while IFS= read line; do
        # Check and fail immediately in case any status or justification is not
        # in the specified list.
        local status=$(get_csv_field "${line}" 6)
        check_status_justification "status" "$status"
        local justification=$(get_csv_field "${line}" 7)
        check_status_justification "justification" "${justification}"

        local image=$(get_csv_field "${line}" 1)
        local vuln_id=$(get_csv_field "${line}" 2)
        local pkg_name=$(get_csv_field "${line}" 3)
        local pkg_version=$(get_csv_field "${line}" 4)
        local target=$(get_csv_field "${line}" 5)
        local note=$(get_csv_field "${line}" 8)

        # First we try to match the line with the target included, which will be
        # positive in case it's a binary package. If it fails, then we remove
        # the target and try to match without it, which will succeed if it's an
        # OS level package. Otherwise, we fail without a valid match.
        # A note about OS level pkgs, in the current scan with Trivy, the target
        # isn't the full pkg name with its path inside the container, like what
        # happens with binaries, it's actually the OS distro.
        local cve_line=""
        if ! cve_line=$(grep "^${image},.*,${pkg_name},${pkg_version},.*,${vuln_id},.*,.*,${target}," "${CVE_FILE}"); then
            if ! cve_line=$(grep "^${image},.*,${pkg_name},${pkg_version},.*,${vuln_id}," "${CVE_FILE}"); then
                local msg="No matching CVE line found for \`${line}\`"
                echo -e "[ERROR] ${msg}"
                issue_add_comment "${msg}."
                fatal
            fi
        fi
        echo -e "[INFO] Found a matching CVE line\n${cve_line}\n"

        # Used to find if the CVE is for a Go binary or a specific distro
        # package.
        local cve_type=$(get_csv_field "${cve_line}" 5)
        local product_id=$(get_product "${cve_type}" "${image}" "${target}")
        local subcomponent_id=$(get_subcomponent "${cve_type}" "${pkg_name}" "${pkg_version}" "${target}")
        echo "\"${vuln_id}\",\"\",\"${product_id}\",\"${subcomponent_id}\",\"${status}\",\"\",\"${justification}\",\"${note}\",\"\"" >> "${tmp_csv_file}"

        # If the product_id is for an image in the Prime registry, then we add
        # another VEX entry for the same image in DockerHub, because it needs to
        # match the exact registry address in order for the CVE scanner to match
        # the VEXed entry.
        if echo "${product_id}" | grep -q "${PRIME_REGISTRY}"; then
            product_id=$(echo "${product_id}" | sed "s/${PRIME_REGISTRY}/index.docker.io/")
            echo "\"${vuln_id}\",\"\",\"${product_id}\",\"${subcomponent_id}\",\"${status}\",\"\",\"${justification}\",\"${note}\",\"\"" >> "${tmp_csv_file}"
        fi

        merge_cve_csv "${VEX_CVES_MANUAL_CSV}" "${tmp_csv_file}"
    done

    merge_cve_files
    git_commit
}

main() {
    source "${WORKING_DIR}/env.sh"
    source "${WORKING_DIR}/helper.sh"

    check_issue_state
    check_issue_body
    vex_cve
}

WORKING_DIR=$(dirname $(realpath ${0}))
cd "${WORKING_DIR}"
main

