#include <assert.h>
#include <float.h>
#include <math.h>
#include <stdlib.h>
#include <strings.h>

#include <Rmath.h>

#include "cgeneric.h"

#define Malloc(n_, type_) (type_ *) malloc((n_) * sizeof(type_))

#ifndef M_LN2
#define M_LN2 0.693147180559945309417232121458176568
#endif

/*
 * Numerically stable inverse-logit.
 *
 * The naive implementation
 *
 *     1 / (1 + exp(-eta))
 *
 * overflows when eta is a large negative number because exp(-eta) can exceed
 * the floating-point range. Splitting by sign keeps the intermediate exponent
 * bounded:
 *
 * - for eta >= 0, exp(-eta) is safe and small;
 * - for eta < 0, exp(eta) is safe and small.
 *
 * Both branches are algebraically identical, but the two-form implementation
 * avoids unnecessary Inf/NaN propagation in extreme tails.
 */
static double inv_logit(double eta)
{
	if (eta >= 0.0) {
		double z = exp(-eta);
		return 1.0 / (1.0 + z);
	}

	double z = exp(eta);
	return z / (1.0 + z);
}

/*
 * Stable evaluation of log(1 + exp(x)).
 *
 * This quantity appears repeatedly when converting between the logit scale and
 * log-probabilities. The direct form is unstable:
 *
 * - when x is large and positive, exp(x) overflows;
 * - when x is large and negative, exp(x) underflows to 0 and we lose precision.
 *
 * Rewriting by sign avoids both issues:
 *
 * - x + log1p(exp(-x)) for positive x;
 * - log1p(exp(x)) for non-positive x.
 *
 * log1p(.) is used because it is accurate when its argument is close to zero.
 */
static double stable_log1pexp(double x)
{
	if (x > 0.0) {
		return x + log1p(exp(-x));
	}

	return log1p(exp(x));
}

/*
 * log(sigmoid(eta)) = -log(1 + exp(-eta))
 *
 * Keeping this as a dedicated helper makes the binomial log-likelihood easier
 * to read and centralizes the numerically stable form in one place.
 */
static double log_inv_logit(double eta)
{
	return -stable_log1pexp(-eta);
}

/*
 * log(1 - sigmoid(eta)) = -log(1 + exp(eta))
 *
 * This mirrors log_inv_logit(.) above and is the stable form for the failure
 * probability on the logit scale.
 */
static double log1m_inv_logit(double eta)
{
	return -stable_log1pexp(eta);
}

/*
 * Stable addition on the log scale.
 *
 * This is used when a censored observation contributes a left-tail probability
 * P(Y <= q), which we evaluate as a sum of Binomial point masses. Summing on
 * the log scale avoids catastrophic underflow when the tail probability is
 * extremely small.
 */
static double stable_logspace_add(double log_x, double log_y)
{
	if (log_x == -INFINITY) {
		return log_y;
	}
	if (log_y == -INFINITY) {
		return log_x;
	}
	if (log_x < log_y) {
		double tmp = log_x;
		log_x = log_y;
		log_y = tmp;
	}

	return log_x + log1p(exp(log_y - log_x));
}

/*
 * INLA passes y as doubles, but this likelihood expects count data.
 *
 * The conversion is intentionally strict:
 *
 * - round to the nearest integer;
 * - assert that the original value is essentially integer-valued.
 *
 * That catches accidental misuse early during development instead of silently
 * truncating or accepting malformed inputs.
 */
static int as_count(double value)
{
	int ivalue = (int) lround(value);
	assert(fabs(value - (double) ivalue) < 1e-8);
	return ivalue;
}

/*
 * Same idea as as_count(.), but specialized for a 0/1 indicator.
 *
 * The third response component y[2], when present, flags whether the observed
 * count is exact or censored. Restricting it to {0, 1} avoids ambiguous states.
 */
static int as_binary_flag(double value)
{
	int ivalue = as_count(value);
	assert(ivalue == 0 || ivalue == 1);
	return ivalue;
}

/*
 * Ordinary binomial log-likelihood for:
 *
 *     Y ~ Binomial(trials, p),   logit(p) = eta
 *
 * where "successes" is the realized count Y.
 *
 * The combinatorial term is written with lgamma(.) so the expression remains
 * valid and stable for moderately large trial counts:
 *
 *     log choose(trials, successes)
 *
 * plus the success and failure contributions on the log-probability scale.
 *
 * Returning -Inf for invalid inputs lets the caller treat impossible parameter
 * combinations as zero probability mass without additional branching.
 */
static double binomial_loglike(int successes, int trials, double eta)
{
	if (trials < 0 || successes < 0 || successes > trials) {
		return -INFINITY;
	}

	if (trials == 0) {
		return 0.0;
	}

	return lgamma((double) trials + 1.0)
		- lgamma((double) successes + 1.0)
		- lgamma((double) (trials - successes) + 1.0)
		+ successes * log_inv_logit(eta)
		+ (trials - successes) * log1m_inv_logit(eta);
}

/*
 * Stable log P(Y <= q) for Y ~ Binomial(trials, inv_logit(eta)).
 *
 * Rmath::pbinom(..., log_p = 1) is usually fine, but for the censored cancer
 * application we routinely see left tails like P(Y <= 10) with trial counts in
 * the tens or hundreds of thousands. In that regime R's incomplete-beta path
 * can underflow to -Inf even though the log-probability is still finite.
 *
 * The censoring threshold is small in this model, so a direct recurrence over
 * the first q + 1 point masses is both exact and numerically stable:
 *
 *   P(Y = 0)     = (1 - p)^n
 *   P(Y = k + 1) = P(Y = k) * ((n - k) / (k + 1)) * p / (1 - p)
 *
 * Accumulating those terms with stable_logspace_add(.) preserves finite log tails in
 * cases where pbinom() would otherwise drop to -Inf.
 */
static double binomial_logcdf_small_q(int q, int trials, double eta)
{
	double log_term;
	double log_sum;

	if (trials < 0) {
		return NAN;
	}
	if (q < 0) {
		return -INFINITY;
	}
	if (q >= trials) {
		return 0.0;
	}
	if (trials == 0) {
		return 0.0;
	}

	log_term = trials * log1m_inv_logit(eta);
	log_sum = log_term;

	for (int k = 0; k < q; k++) {
		log_term += log((double) (trials - k)) - log((double) (k + 1)) + eta;
		log_sum = stable_logspace_add(log_sum, log_term);
	}

	return log_sum;
}

/*
 * General log-CDF helper.
 *
 * For small censoring thresholds we always use the exact recurrence above.
 * Otherwise we fall back to Rmath::pbinom and only reuse the recurrence as a
 * rescue path if Rmath still reports a non-finite log probability.
 */
static double binomial_logcdf(int q, int trials, double eta)
{
	double probability;
	double log_cdf;

	if (trials < 0) {
		return NAN;
	}
	if (q < 0) {
		return -INFINITY;
	}
	if (q >= trials) {
		return 0.0;
	}
	if (trials == 0) {
		return 0.0;
	}

	if (q <= 64) {
		return binomial_logcdf_small_q(q, trials, eta);
	}

	probability = inv_logit(eta);
	log_cdf = pbinom((double) q, (double) trials, probability, 1, 1);
	if (isfinite(log_cdf)) {
		return log_cdf;
	}

	return binomial_logcdf_small_q(q, trials, eta);
}

/*
 * Log-likelihood contribution for an observation that may be censored.
 *
 * Interpretation of inputs:
 *
 * - censoring_point = observed count threshold;
 * - trials          = total number of Bernoulli trials;
 * - eta             = linear predictor on the logit scale;
 * - censored        = 0 means the count is exact, 1 means the count is
 *                     right-censored in the sense used by the surrounding R
 *                     code / INLA model.
 *
 * Behavior:
 *
 * - if censored == 0, this reduces to the exact binomial log-likelihood at
 *   Y = censoring_point;
 * - if censored == 1, the observed datum only tells us that Y <=
 *   censoring_point, so the likelihood contribution is the cumulative
 *   probability P(Y <= censoring_point). We evaluate that on the log scale
 *   with binomial_logcdf(.), which stays finite for the small-threshold,
 *   large-trial regime used in the cancer model.
 *
 */
static double censored_binomial_loglike(int censoring_point, int trials, double eta,
					int censored)
{
	if (!censored) {
		return binomial_loglike(censoring_point, trials, eta);
	}

	if (trials < 0 || censoring_point < 0 || censoring_point > trials) {
		return -INFINITY;
	}

	return binomial_logcdf(censoring_point, trials, eta);
}

/*
 * CDF corresponding to the same observation model.
 *
 * For INLA's CDF callback we return the binomial CDF at the supplied
 * censoring_point. The "censored" flag is intentionally ignored here because
 * the CDF of the latent/count distribution itself does not change; only the
 * likelihood contribution changes depending on whether an observation is exact
 * or interval/censor information.
 *
 * Edge-case conventions:
 *
 * - point < 0   -> CDF is 0;
 * - trials < 0  -> invalid input, return NaN;
 * - trials == 0 -> degenerate distribution at 0, so CDF is 1;
 * - point > n   -> treated as invalid in this implementation and returns NaN.
 *
 * The final branch converts the stable log-CDF back to the natural
 * probability scale and clamps the result away from exact 0/1. That avoids
 * downstream Inf/NaN propagation when INLA uses the CDF callback to build
 * initial values for extreme censored rows.
 */
static double censored_binomial_cdf(int censoring_point, int trials, double eta,
				    int censored)
{
	double log_cdf;
	double cdf;

	if (censoring_point < 0) {
		return DBL_MIN;
	}

	if (trials < 0) {
		return NAN;
	}

	if (trials == 0) {
		return 1.0 - DBL_EPSILON;
	}

	if (censoring_point > trials) {
		return NAN;
	}

	(void) censored;
	log_cdf = binomial_logcdf(censoring_point, trials, eta);
	if (!isfinite(log_cdf)) {
		return (log_cdf < 0.0 ? DBL_MIN : 1.0 - DBL_EPSILON);
	}

	cdf = exp(log_cdf);
	if (cdf <= 0.0) {
		return DBL_MIN;
	}
	if (cdf >= 1.0) {
		return 1.0 - DBL_EPSILON;
	}

	return cdf;
}

/*
 * INLA cgeneric entry point.
 *
 * The function is multiplexed by "cmd", which tells the shared library which
 * operation INLA is requesting. The important conventions are:
 *
 * - INITIAL:   return an initial value for hyperparameters;
 * - LOG_PRIOR: return the log-prior contribution for hyperparameters;
 * - LOGLIKE:   fill "result" with the log-likelihood evaluated at each x[i];
 * - CDF:       fill "result" with the observation CDF evaluated at each x[i];
 * - QUIT:      shutdown hook, unused here.
 *
 * Inputs used by this likelihood:
 *
 * - y[0] = observed count / censoring threshold;
 * - y[1] = number of trials;
 * - y[2] = optional 0/1 censoring indicator.
 *
 * "x" is the vector of linear predictors eta values supplied by INLA. Each
 * callback must therefore loop over nx and compute the requested quantity at
 * every x[i].
 *
 * No hyperparameters are used in this model, so theta and LOG_PRIOR/INITIAL
 * are trivial and return zeros.
 */
/*
 * The exported symbol name must match the `model=` argument passed to
 * `inla.cloglike.define()` on the R side.
 */
double *inla_cloglike_censored_binomial(inla_cloglike_cmd_tp cmd, double *theta,
					inla_cgeneric_data_tp *data, int ny, double *y,
					int nx, double *x, double *result)
{
	double *ret = NULL;

	(void) theta;
	assert(ny >= 2);
	(void) data;

	switch (cmd) {
	case INLA_CLOGLIKE_INITIAL:
	{
		/* No hyperparameters: return a length-1 placeholder initialized to 0. */
		ret = Malloc(1, double);
		ret[0] = 0.0;
	}
		break;

	case INLA_CLOGLIKE_LOG_PRIOR:
	{
		/* No hyperparameters means no prior contribution beyond an additive 0. */
		ret = Malloc(1, double);
		ret[0] = 0.0;
	}
		break;

	case INLA_CLOGLIKE_LOGLIKE:
	{
		/*
		 * Decode the observation once, then evaluate the likelihood for each
		 * linear predictor supplied by INLA.
		 */
		int cases = as_count(y[0]);
		int trials = as_count(y[1]);
		int censored = (ny >= 3 ? as_binary_flag(y[2]) : 0);

		for (int i = 0; i < nx; i++) {
			result[i] = censored_binomial_loglike(cases, trials, x[i], censored);
		}
	}
		break;

	case INLA_CLOGLIKE_CDF:
	{
		/*
		 * Same parsing as above, but now fill result[i] with the model CDF
		 * rather than the log-likelihood.
		 */
		int cases = as_count(y[0]);
		int trials = as_count(y[1]);
		int censored = (ny >= 3 ? as_binary_flag(y[2]) : 0);

		for (int i = 0; i < nx; i++) {
			result[i] = censored_binomial_cdf(cases, trials, x[i], censored);
		}
	}
		break;

	case INLA_CLOGLIKE_QUIT:
		/* Nothing was allocated globally, so there is nothing to tear down. */
		break;
	}

	return ret;
}
