/*
 * An implementation of TMSR RSA
 * S.MG, 2018
 */

#include "smg_rsa.h"
#include <assert.h>

void gen_keypair( RSA_secret_key *sk ) {
	/* precondition: sk is not null */
	assert(sk != NULL);

	/* precondition: enough memory allocated, corresponding to key size */
	int noctets_pq = KEY_LENGTH_OCTETS / 2;
	unsigned int nlimbs_pq = mpi_nlimb_hint_from_nbytes( noctets_pq);
	unsigned int nlimbs_n = mpi_nlimb_hint_from_nbytes( KEY_LENGTH_OCTETS);
	assert( mpi_get_alloced( sk->n) >= nlimbs_n);
	assert( mpi_get_alloced( sk->p) >= nlimbs_pq);
	assert( mpi_get_alloced( sk->q) >= nlimbs_pq);

	/* helper variables for calculating Euler's totient phi=(p-1)*(q-1) */
	MPI p_minus1 = mpi_alloc(nlimbs_pq);
	MPI q_minus1 = mpi_alloc(nlimbs_pq);
	MPI phi = mpi_alloc(nlimbs_n);

	/* generate 2 random primes, p and q*/
	/* gen_random_prime sets top 2 bits to 1 so p*q will have KEY_LENGTH bits */
	/* in the extremely unlikely case that p = q, discard and generate again */
	do {
		gen_random_prime( noctets_pq, sk->p);
		gen_random_prime( noctets_pq, sk->q);
	} while ( mpi_cmp( sk->p, sk->q) == 0);

	/* swap if needed, to ensure p < q for calculating u */
	if ( mpi_cmp( sk->p, sk->q) > 0)
		mpi_swap( sk->p, sk->q);

	/* calculate helper for Chinese Remainder Theorem:
			u = p ^ -1 ( mod q )
		 this is used to speed-up decryption.
	*/
	mpi_invm( sk->u, sk->p, sk->q);

	/* calculate modulus n = p * q */
	mpi_mul( sk->n, sk->p, sk->q);

	/* calculate Euler totient: phi = (p-1)*(q-1) */
	mpi_sub_ui( p_minus1, sk->p, 1);
	mpi_sub_ui( q_minus1, sk->q, 1);
	mpi_mul( phi, p_minus1, q_minus1);

	/* choose random prime e, public exponent, with 3 < e < phi */
	/* because e is prime, gcd(e, phi) is always 1 so no need to check it */
	do {
		gen_random_prime( noctets_pq, sk->e);
	} while ( (mpi_cmp_ui(sk->e, 3) < 0) || (mpi_cmp(sk->e, phi) > 0));

	/* calculate private exponent d, 1 < d < phi, where e * d = 1 mod phi */
	mpi_invm( sk->d, sk->e, phi);

	/*	tidy up: free locally allocated memory for helper variables */
	mpi_free(phi);
	mpi_free(p_minus1);
	mpi_free(q_minus1);
}

void public_rsa( MPI output, MPI input, RSA_public_key *pk ) {

	/* mpi_powm can't handle output and input being same */
	assert (output != input);

	mpi_powm( output, input, pk->e, pk->n );
}

void secret_rsa( MPI output, MPI input, RSA_secret_key *skey ) {
	/* at its simplest, this would be input ^ d (mod n), hence:
	 *    mpi_powm( output, input, skey->d, skey->n );
	 * for faster decryption though, we'll use CRT and Garner's algorithm, hence:
	 *        u = p ^ (-1) (mod q) , already calculated and stored in skey
	 *       dp = d mod (p-1)
	 *       dq = d mod (q-1)
	 *       m1 = input ^ dp (mod p)
	 *       m2 = input ^ dq (mod q)
	 *        h = u * (m2 - m1) mod q
	 *   output = m1 + h * p
   * Note that same CRT speed up isn't available for encryption because at 
encryption time not enough information is available (only e and n are known).
	 */

	/* allocate memory for all local, helper MPIs */
	MPI p_minus1 = mpi_alloc( mpi_get_nlimbs( skey->p) );
	MPI q_minus1 = mpi_alloc( mpi_get_nlimbs( skey->q) );
	int nlimbs   = mpi_get_nlimbs( skey->n ) + 1;
	MPI dp       = mpi_alloc( nlimbs );
	MPI dq       = mpi_alloc( nlimbs );
	MPI m1       = mpi_alloc( nlimbs );
	MPI m2       = mpi_alloc( nlimbs );
	MPI h        = mpi_alloc( nlimbs );

	/* p_minus1 = p - 1 */
	mpi_sub_ui( p_minus1, skey->p, 1 );

	/* dp = d mod (p - 1) aka remainder of d / (p - 1) */
	mpi_fdiv_r( dp, skey->d, p_minus1 );

	/* m1 = input ^ dp (mod p) */
	mpi_powm( m1, input, dp, skey->p );

	/* q_minus1 = q - 1 */
	mpi_sub_ui( q_minus1, skey->q, 1 );

	/* dq = d mod (q - 1) aka remainder of d / (q - 1) */
	mpi_fdiv_r( dq, skey->d, q_minus1 );

	/* m2 = input ^ dq (mod q) */
	mpi_powm( m2, input, dq, skey->q );

	/* h = u * ( m2 - m1 ) mod q */
	mpi_sub( h, m2, m1 );
	if ( mpi_is_neg( h ) )
		mpi_add ( h, h, skey->q );
	mpi_mulm( h, skey->u, h, skey->q );

	/* output = m1 + h * p */
	mpi_mul ( h, h, skey->p );
	mpi_add ( output, m1, h );

	/* tidy up */
	mpi_free ( p_minus1 );
	mpi_free ( q_minus1 );
	mpi_free ( dp );
	mpi_free ( dq );
	mpi_free ( m1 );
	mpi_free ( m2 );
	mpi_free ( h );

}