#!/bin/sh

set -ex

# get_repo_tags() retrieves the list of tags (releases) of a given repo that
# will be scanned. It ignores tags related to alpha, debug and patch releases.
# The first parameter is the repo, in the format org_name/repo_name, that it
# will pull the tags.
get_repo_tags() {
    local repo="${1}"
    local tmp_file=$(mktemp)
    local tmp_file_2=$(mktemp)
    # By default we scan only the last 60 days of tags
    local days_delta=$(date -d "$date -60 days" +"%Y-%m-%d")

    # First we retrieve all the tags that we can from the repo, then we filter
    # based on the amount of days to scan. We must specify a limit, as high as
    # possible, because the repo might have generated dozens of RCs and the GH
    # default limit is only 30, so we would miss tags.
    gh release -R ${repo} list --limit 1000 --json tagName,createdAt,publishedAt | \
        jq -r ".[] | select(.createdAt > \"${days_delta}\") | [.tagName,.createdAt,.publishedAt] | @csv" | \
        grep -vE "alpha|debug|patch" > "${tmp_file}" || true

    # In case the filter per days above failed, it means that the repo in
    # question doesn't generate tags too often, so we simply retrieve and scan
    # the last 10 tags.
    if ! [[ -s "${tmp_file}" ]] then
        gh release -R "${repo}" list --limit 10 --json tagName,createdAt,publishedAt | \
            jq -r ".[] | [.tagName,.createdAt,.publishedAt] | @csv" | \
            grep -vE "alpha|debug|patch" > "${tmp_file}" || continue
    fi

    # Sort the tags and remove possible empty lines. The first column contains
    # the tag name, from the previous output.
    awk --csv '{ print $1 }' "${tmp_file}" | sort | sort -u | \
        sed  "/^$/d" > "${tmp_file_2}" && mv "${tmp_file_2}" "${tmp_file}"

    # Remove redundant tags by removing RC versions in case there is already a
    # final released tag. If there is no final tag yet, then we keep only the
    # last RC.
    # Retrieve the versions without any RC related information. RCs are
    # specified after the '-' signal. RKE2 and K3s versions also have the '+'
    # signal for their specific builds.
    rm -rf "${tmp_file_2}"
    local tmp_tags=$(cat "${tmp_file}" | sed "s/[-+].*//" | sort | uniq)
    for tag in ${tmp_tags}; do
        # If there is a final released version, we keep it.
        if ! grep -E "${tag}(\+|$)" "${tmp_file}" >> "${tmp_file_2}"; then
            # Otherwise, we sort all related RCs and keep only the last.
            grep -E "${tag}([-+]|$)" "${tmp_file}" | sort -Vr | head -n 1 >> "${tmp_file_2}"
        fi
    done

    cat "${tmp_file_2}"
}

# govulncheck_scan() scans a local repo and generates its VEX report with the
# identified false positives.
# The first parameter is the name of the repo being scanned in the format
# org_name/repo_name.
# Outputs the parsed data from the VEX JSON report to CSV format.
govulncheck_scan() {
    local repo="${1}"
    # Default scan path for govulncheck.
    local govulncheck_scan_path="./..."
    local package="$(grep "^module " go.mod | sed "s/^module\s\+\(.*\)/\1/")"

    # Skip if package is in the deny list.
    if check_pkg_deny_list "${package}"; then
        return
    fi
    package="pkg:golang/${package}"

    # This is needed specifically due to the way that K3s is compiled, otherwise
    # govulncheck will fail. Here we can add other checks if needed.
    if [[ "${repo}" = "k3s-io/k3s" ]]; then
        govulncheck_scan_path="./cmd/server"
    fi

    local tmp_govulncheck_report=$(mktemp)

    # If the scan fails, which can happen when the code is broken, we simply
    # ignore and continue, to avoid failing the entire pipeline.
    "${BIN_GOVULNCHECK}" -format openvex \
        "${govulncheck_scan_path}" > "${tmp_govulncheck_report}" || continue

    # Transform govulncheck's default JSON output into a CSV file, by filtering
    # only the fields that we need and only when the status is "not_affected",
    # because we will only present false positives in our VEX files.
    jq -r ".statements[]? | select(.status == \"not_affected\") | [.vulnerability.name,(.vulnerability.aliases | join(\",\")),\"${package}\",.products[].\"@id\",.status,\"\",.justification,.impact_statement,\"\"] | @csv" "${tmp_govulncheck_report}"

    rm -rf "${tmp_govulncheck_report}"
}

vex_scan() {
    local tmp_csv_file=$(mktemp)
    local tmp_scan_dir=$(mktemp -d)

    cd "${tmp_scan_dir}"

    for repo in $(grep -vE "^repo$|^#" "${TARGETS_FILE}"); do
        local repo_dir=$(basename "${repo}")

        git clone "https://github.com/${repo}" "${repo_dir}"
        cd "${repo_dir}"

        for tag in $(get_repo_tags "${repo}"); do
            git checkout "${tag}"
            govulncheck_scan "${repo}" >> "${tmp_csv_file}"
        done

        cd "${tmp_scan_dir}"
        rm -rf "${repo_dir}"
    done

    merge_cve_csv "${VEX_CVES_AUTOMATED_CSV}" "${tmp_csv_file}"

    cd "${WORKING_DIR}"
    rm -rf "${tmp_csv_file}" "${tmp_scan_dir}"
}

main() {
    source "${WORKING_DIR}/env.sh"
    source "${WORKING_DIR}/helper.sh"

    vex_scan
    merge_cve_files
}

WORKING_DIR=$(dirname $(realpath ${0}))
cd "${WORKING_DIR}"
main

