/***************************************************************************
*
Copyright 2013 CertiVox UK Ltd. *
*
This file is part of CertiVox MIRACL Crypto SDK. *
*
The CertiVox MIRACL Crypto SDK provides developers with an *
extensive and efficient set of cryptographic functions. *
For further information about its features and functionalities please *
refer to http://www.certivox.com *
*
* The CertiVox MIRACL Crypto SDK is free software: you can *
redistribute it and/or modify it under the terms of the *
GNU Affero General Public License as published by the *
Free Software Foundation, either version 3 of the License, *
or (at your option) any later version. *
*
* The CertiVox MIRACL Crypto SDK is distributed in the hope *
that it will be useful, but WITHOUT ANY WARRANTY; without even the *
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
See the GNU Affero General Public License for more details. *
*
* You should have received a copy of the GNU Affero General Public *
License along with CertiVox MIRACL Crypto SDK. *
If not, see . *
*
You can be released from the requirements of the license by purchasing *
a commercial license. Buying such a license is mandatory as soon as you *
develop commercial activities involving the CertiVox MIRACL Crypto SDK *
without disclosing the source code of your own applications, or shipping *
the CertiVox MIRACL Crypto SDK with a closed source product. *
*
***************************************************************************/
/*
* MIRACL fast fourier multiplication routine, using 3 prime method.
* mrfast.c - only faster for very high precision multiplication
* of numbers > about 4096 bits (see below)
* See "The Fast Fourier Transform in a Finite Field" by J.M. Pollard,
* Mathematics of Computation, Vol. 25, No. 114, April 1971, pp365-374
* Also Knuth Vol. 2, Chapt. 4.3.3
*
* Takes time preportional to 9+15N+9N.lg(N) to multiply two different
* N digit numbers. This reduces to 6+18N+6N.lg(N) when squaring.
* The classic method takes N.N and N(N+1)/2 respectively
*
* Fast Polynomial arithmetic
*
* See "A New Polynomial Factorisation Algorithm and its Implementation"
* by Victor Shoup, Jl. Symbolic Computation 1996
* Uses FFT method for fast arithmetic of large degree polynomials
*
*/
#include
#include "miracl.h"
#ifdef MR_FP
#include
#endif
#ifdef MR_WIN64
#include
#endif
#ifndef MR_STATIC
static mr_utype twop(int n)
{ /* 2^n */
#ifdef MR_FP
int i;
#endif
mr_utype r=1;
if (n==0) return r;
#ifdef MR_FP
for (i=0;icheck=OFF;
multiply(_MIPP_ m1,m2,mr_mip->w5);
premult(_MIPP_ mr_mip->w5,2*newn+1,mr_mip->w5);
kk=mr_shiftbits((mr_small)1,MIRACL-2-logn);
if (mr_mip->base!=0) while (4*kk*newn>mr_mip->base) kk=mr_shiftbits(kk,-1);
pr=0;
while (size(mr_mip->w5)>0)
{ /* find out how many primes will be needed */
do
{
kk--;
p=kk*newn+1;
} while(spmd((mr_small)2,(mr_small)(p-1),p)!=1);
#ifdef MR_FP_ROUNDING
mr_sdiv(_MIPP_ mr_mip->w5,p,mr_invert(p),mr_mip->w5);
#else
mr_sdiv(_MIPP_ mr_mip->w5,p,mr_mip->w5);
#endif
pr++;
}
mr_mip->check=ON;
/* if nothing has changed, don't recalculate */
if (logn<=mr_mip->logN && pr==mr_mip->nprimes) return pr;
fft_reset(_MIPPO_ );
mr_mip->prime=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
mr_mip->inverse=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
mr_mip->roots=(mr_utype**)mr_alloc(_MIPP_ pr,sizeof(mr_utype *));
mr_mip->t= (mr_utype **)mr_alloc(_MIPP_ pr,sizeof(mr_utype *));
mr_mip->cr=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
mr_mip->wa=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
mr_mip->wb=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
mr_mip->wc=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
kk=mr_shiftbits((mr_small)1,MIRACL-2-logn);
if (mr_mip->base!=0) while (4*kk*newn>mr_mip->base) kk=mr_shiftbits(kk,-1);
for (i=0;iroots[i]=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
mr_mip->t[i]=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
do
{
kk--;
p=kk*newn+1;
} while(spmd((mr_small)2,(mr_small)(p-1),p)!=1);
proot=p-1;
for (j=1;jroots[i][0]=proot; /* build residue table */
for (j=1;jroots[i][j]=smul(mr_mip->roots[i][j-1],proot,p);
mr_mip->inverse[i]=invers((mr_small)newn,p);
mr_mip->prime[i]=p;
}
mr_mip->logN=logn;
mr_mip->nprimes=pr;
/* set up chinese remainder structure */
if (cr)
if (!scrt_init(_MIPP_ &mr_mip->chin,pr,mr_mip->prime)) fft_reset(_MIPPO_ );
return pr;
}
void fft_reset(_MIPDO_ )
{ /* reclaim any space used by FFT */
int i;
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
if (mr_mip->degree!=0)
{ /* clear any precomputed tables */
for (i=0;inprimes;i++)
{
mr_free(mr_mip->s1[i]);
mr_free(mr_mip->s2[i]);
}
mr_free(mr_mip->s1);
mr_free(mr_mip->s2);
mr_mip->degree=0;
}
if (mr_mip->logN!=0)
{ /* clear away old stuff */
for (i=0;inprimes;i++)
{
mr_free(mr_mip->roots[i]);
mr_free(mr_mip->t[i]);
}
mr_free(mr_mip->wa);
mr_free(mr_mip->wb);
mr_free(mr_mip->wc);
mr_free(mr_mip->cr);
mr_free(mr_mip->t);
mr_free(mr_mip->roots);
mr_free(mr_mip->inverse);
mr_free(mr_mip->prime);
mr_mip->nprimes=0;
mr_mip->logN=0;
mr_mip->same=FALSE;
}
/* clear CRT structure */
if (mr_mip->chin.NP!=0) scrt_end(&mr_mip->chin);
}
void mr_dif_fft(_MIPD_ int logn,int pr,mr_utype *data)
{ /* decimate-in-frequency fourier transform */
int mmax,m,j,k,istep,i,ii,jj,newn,offset;
mr_utype w,temp,prime,*roots;
#ifdef MR_NOASM
mr_large dble,ldres;
#endif
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
#ifdef MR_FP_ROUNDING
mr_large iprime;
#endif
prime=mr_mip->prime[pr];
roots=mr_mip->roots[pr];
#ifdef MR_FP_ROUNDING
iprime=mr_invert(prime);
#endif
newn=(1<logN-logn);
mmax=newn;
for (k=0;k>=1;
ii=newn;
jj=newn/istep;
ii-=jj;
for (i=0;i=prime) data[i]-=prime;
data[j]=temp;
}
for (m=1;m=prime) data[i]-=prime;
#ifdef MR_NOASM
dble=(mr_large)w*temp;
#ifdef MR_FP_ROUNDING
data[j]=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble*iprime));
#else
data[j]=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble/prime));
#endif
#else
#ifdef MR_FP_ROUNDING
imuldiv(w,temp,(mr_small)0,prime,iprime,(mr_small *)&data[j]);
#else
muldiv(w,temp,(mr_small)0,prime,(mr_small *)&data[j]);
#endif
#endif
}
#endif
}
}
}
void mr_dit_fft(_MIPD_ int logn,int pr,mr_utype *data)
{ /* decimate-in-time inverse fourier transform */
int mmax,m,j,k,i,istep,ii,jj,newn,offset;
mr_utype w,temp,prime,*roots;
#ifdef MR_NOASM
mr_large dble,ldres;
#endif
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
#ifdef MR_FP_ROUNDING
mr_large iprime;
#endif
prime=mr_mip->prime[pr];
roots=mr_mip->roots[pr];
offset=(mr_mip->logN-logn);
#ifdef MR_FP_ROUNDING
iprime=mr_invert(prime);
#endif
newn=(1<=prime) data[i]-=prime;
}
for (m=1;m=prime) data[i]-=prime;
}
#endif
}
mmax=istep;
}
}
static void modxn_1(_MIPD_ int n,int deg,big *x)
{ /* set X (of degree deg) =X mod x^n-1 = X%x^n + X/x^n */
int i;
for (i=0;n+i<=deg;i++)
{
nres_modadd(_MIPP_ x[i],x[n+i],x[i]);
zero(x[n+i]);
}
}
BOOL mr_poly_rem(_MIPD_ int dg,big *G,big *R)
{ /* G is a polynomial of degree dg - G is overwritten */
int i,j,newn,logn,np,n;
mr_utype p,inv,fac;
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
#ifdef MR_FP_ROUNDING
mr_large ip;
#endif
n=mr_mip->degree; /* degree of modulus */
if (n==0) return FALSE; /* the preset tables have been destroyed */
np=mr_mip->nprimes;
newn=1; logn=0;
while (2*n>newn) { newn<<=1; logn++; }
for (i=0;iprime[i];
#ifdef MR_FP_ROUNDING
ip=mr_invert(p);
for (j=n;j<=dg;j++)
mr_mip->t[i][j-n]=mr_sdiv(_MIPP_ G[j],p,ip,mr_mip->w1);
#else
for (j=n;j<=dg;j++)
mr_mip->t[i][j-n]=mr_sdiv(_MIPP_ G[j],p,mr_mip->w1);
#endif
for (j=dg-n+1;jt[i][j]=0;
mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]);
for (j=0;jt[i][j],mr_mip->s1[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
#else
muldiv(mr_mip->t[i][j],mr_mip->s1[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
#endif
mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]);
inv=mr_mip->inverse[i];
if (mr_mip->logN > logn)
{ /* adjust 1/N log p for N/2, N/4 etc */
fac=twop(mr_mip->logN-logn);
inv=smul(fac,inv,p);
}
for (j=0;jt[i][j+n-1],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j+n-1]);
#else
muldiv(mr_mip->t[i][j+n-1],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j+n-1]);
#endif
}
mr_mip->check=OFF;
mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
/* w6 = N.R */
for (j=0;jcr[i]=mr_mip->t[i][j+n-1];
scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7);
divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6); /* R[j] may be too big for redc */
redc(_MIPP_ mr_mip->w7,R[j]);
}
mr_mip->check=ON;
for (i=0;iprime[i];
#ifdef MR_FP_ROUNDING
ip=mr_invert(p);
for (j=0;jt[i][j]=mr_sdiv(_MIPP_ R[j],p,ip,mr_mip->w1);
#else
for (j=0;jt[i][j]=mr_sdiv(_MIPP_ R[j],p,mr_mip->w1);
#endif
for (j=n;j<1+newn/2;j++) mr_mip->t[i][j]=0;
mr_dif_fft(_MIPP_ logn-1,i,mr_mip->t[i]); /* Note: Half size */
for (j=0;jt[i][j],mr_mip->s2[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
#else
muldiv(mr_mip->t[i][j],mr_mip->s2[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
#endif
mr_dit_fft(_MIPP_ logn-1,i,mr_mip->t[i]);
inv=mr_mip->inverse[i];
if (mr_mip->logN > logn-1)
{
fac=twop(mr_mip->logN-logn+1);
inv=smul(fac,inv,p);
}
for (j=0;jt[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
#else
muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
#endif
}
modxn_1(_MIPP_ newn/2,dg,G); /* G=G mod 2^x - 1 */
mr_mip->check=OFF;
mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
/* w6 = N.R */
for (j=0;jcr[i]=mr_mip->t[i][j];
scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7);
divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6); /* R[j] may be too big for redc */
redc(_MIPP_ mr_mip->w7,R[j]);
nres_modsub(_MIPP_ G[j],R[j],R[j]);
}
mr_mip->check=ON;
return TRUE;
}
void mr_polymod_set(_MIPD_ int n, big *rf,big *f)
{ /* n is degree of f */
int i,j,np,newn,logn,degree;
mr_utype p;
big *F;
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
#ifdef MR_FP_ROUNDING
mr_large ip;
#endif
degree=2*n;
newn=1; logn=0;
while (degree>newn) { newn<<=1; logn++; }
if (mr_mip->degree!=0)
{
for (i=0;inprimes;i++)
{
mr_free(mr_mip->s1[i]);
mr_free(mr_mip->s2[i]);
}
mr_free(mr_mip->s1);
mr_free(mr_mip->s2);
}
if (mr_mip->logNmodulus,mr_mip->modulus,TRUE);
else np=mr_mip->nprimes;
mr_mip->degree=n;
mr_mip->s1=(mr_utype **)mr_alloc(_MIPP_ np,sizeof(mr_utype *));
mr_mip->s2=(mr_utype **)mr_alloc(_MIPP_ np,sizeof(mr_utype *));
F=(big *)mr_alloc(_MIPP_ n+1,sizeof(big));
for (i=0;i<=n;i++)
{
F[i]=mirvar(_MIPP_ 0);
if (f[i]!=NULL) copy(f[i],F[i]);
}
modxn_1(_MIPP_ newn/2,n,F);
for (i=0;is1[i]=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
mr_mip->s2[i]=(mr_utype *)mr_alloc(_MIPP_ 1+newn/2,sizeof(mr_utype));
p=mr_mip->prime[i];
#ifdef MR_FP_ROUNDING
ip=mr_invert(p);
#endif
for (j=0;js1[i][j]=0;
#ifdef MR_FP_ROUNDING
else mr_mip->s1[i][j]=mr_sdiv(_MIPP_ rf[j],p,ip,mr_mip->w1);
#else
else mr_mip->s1[i][j]=mr_sdiv(_MIPP_ rf[j],p,mr_mip->w1);
#endif
}
mr_dif_fft(_MIPP_ logn,i,mr_mip->s1[i]);
for (j=0;j<=n;j++)
#ifdef MR_FP_ROUNDING
mr_mip->s2[i][j]=mr_sdiv(_MIPP_ F[j],p,ip,mr_mip->w1);
#else
mr_mip->s2[i][j]=mr_sdiv(_MIPP_ F[j],p,mr_mip->w1);
#endif
mr_dif_fft(_MIPP_ logn-1,i,mr_mip->s2[i]);
}
for (i=0;i<=n;i++) mr_free(F[i]);
mr_free(F);
}
int mr_ps_zzn_mul(_MIPD_ int deg,big *x,big *y,big *z)
{
int i,j,newn,logn,np;
mr_utype inv,p,fac;
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
#ifdef MR_FP_ROUNDING
mr_large ip;
#endif
newn=1; logn=0;
while (2*deg>newn) { newn <<=1; logn++; }
if (mr_mip->logNmodulus,mr_mip->modulus,TRUE);
else np=mr_mip->nprimes;
for (i=0;iprime[i];
#ifdef MR_FP_ROUNDING
ip=mr_invert(p);
#endif
for (j=0;jwa[j]=0;
#ifdef MR_FP_ROUNDING
else mr_mip->wa[j]=mr_sdiv(_MIPP_ x[j],p,ip,mr_mip->w1);
#else
else mr_mip->wa[j]=mr_sdiv(_MIPP_ x[j],p,mr_mip->w1);
#endif
}
for (j=deg;jwa[j]=0;
mr_dif_fft(_MIPP_ logn,i,mr_mip->wa);
for (j=0;jt[i][j]=0;
#ifdef MR_FP_ROUNDING
else mr_mip->t[i][j]=mr_sdiv(_MIPP_ y[j],p,ip,mr_mip->w1);
#else
else mr_mip->t[i][j]=mr_sdiv(_MIPP_ y[j],p,mr_mip->w1);
#endif
}
for (j=deg;jt[i][j]=0;
mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]);
/* multiply FFTs */
for (j=0;jwa[j],mr_mip->t[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
#else
muldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
#endif
mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]); /* np*N*lgN */
inv=mr_mip->inverse[i];
if (mr_mip->logN > logn)
{
fac=twop(mr_mip->logN-logn);
inv=smul(fac,inv,p);
}
for (j=0;jt[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
#else
muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
#endif
}
mr_mip->check=OFF;
mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
for (j=0;jcr[i]=mr_mip->t[i][j];
scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7);
divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6);
redc(_MIPP_ mr_mip->w7,z[j]);
}
mr_mip->check=ON;
return np;
}
int mr_ps_big_mul(_MIPD_ int deg,big *x,big *y,big *z)
{ /* Multiply two power series with large integer parameters */
int i,j,newn,logn,np;
mr_utype inv,p,fac;
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
#ifdef MR_FP_ROUNDING
mr_large ip;
#endif
newn=1; logn=0;
while (2*deg>newn) { newn <<=1; logn++; }
zero(mr_mip->w2);
zero(mr_mip->w4);
/* find biggest element in each series */
for (i=0;iw3);
if (mr_compare(mr_mip->w3,mr_mip->w2)>0) copy(mr_mip->w3,mr_mip->w2);
}
if (y[i]!=NULL)
{
absol(y[i],mr_mip->w3);
if (mr_compare(mr_mip->w3,mr_mip->w4)>0) copy(mr_mip->w3,mr_mip->w4);
}
}
premult(_MIPP_ mr_mip->w4,2,mr_mip->w4); /* range is +ve and -ve */
/* so extra factor of 2 included */
np=mr_fft_init(_MIPP_ logn,mr_mip->w4,mr_mip->w2,TRUE);
convert(_MIPP_ 1,mr_mip->w3);
/* compute coefficients modulo fft primes */
for (i=0;iprime[i];
#ifdef MR_FP_ROUNDING
ip=mr_invert(p);
#endif
mr_pmul(_MIPP_ mr_mip->w3,p,mr_mip->w3);
for (j=0;jwa[j]=0;
else
{
if (size(x[j])>=0)
{
copy(x[j],mr_mip->w1);
#ifdef MR_FP_ROUNDING
mr_mip->wa[j]=mr_sdiv(_MIPP_ mr_mip->w1,p,ip,mr_mip->w1);
#else
mr_mip->wa[j]=mr_sdiv(_MIPP_ mr_mip->w1,p,mr_mip->w1);
#endif
}
else
{
negify(x[j],mr_mip->w1);
#ifdef MR_FP_ROUNDING
mr_mip->wa[j]=p-mr_sdiv(_MIPP_ mr_mip->w1,p,ip,mr_mip->w1);
#else
mr_mip->wa[j]=p-mr_sdiv(_MIPP_ mr_mip->w1,p,mr_mip->w1);
#endif
}
}
}
for (j=deg;jwa[j]=0;
mr_dif_fft(_MIPP_ logn,i,mr_mip->wa);
for (j=0;jt[i][j]=0;
else
{
if (size(y[j])>=0)
{
copy(y[j],mr_mip->w1);
#ifdef MR_FP_ROUNDING
mr_mip->t[i][j]=mr_sdiv(_MIPP_ mr_mip->w1,p,ip,mr_mip->w1);
#else
mr_mip->t[i][j]=mr_sdiv(_MIPP_ mr_mip->w1,p,mr_mip->w1);
#endif
}
else
{
negify(y[j],mr_mip->w1);
#ifdef MR_FP_ROUNDING
mr_mip->t[i][j]=p-mr_sdiv(_MIPP_ mr_mip->w1,p,ip,mr_mip->w1);
#else
mr_mip->t[i][j]=p-mr_sdiv(_MIPP_ mr_mip->w1,p,mr_mip->w1);
#endif
}
}
}
for (j=deg;jt[i][j]=0;
mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]);
/* multiply FFTs */
for (j=0;jwa[j],mr_mip->t[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
#else
muldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
#endif
mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]); /* np*N*lgN */
inv=mr_mip->inverse[i];
if (mr_mip->logN > logn)
{
fac=twop(mr_mip->logN-logn);
inv=smul(fac,inv,p);
}
for (j=0;jt[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
#else
muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
#endif
}
/* w3 is product of chinese primes */
decr(_MIPP_ mr_mip->w3,1,mr_mip->w4);
subdiv(_MIPP_ mr_mip->w4,2,mr_mip->w4); /* find mid-point of range */
for (j=0;jcr[i]=mr_mip->t[i][j];
scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,z[j]); /* N*3*np*np/2 */
if (mr_compare(z[j],mr_mip->w4)>=0)
{ /* In higher half of range, so number is negative */
subtract(_MIPP_ mr_mip->w3,z[j],z[j]);
negify(z[j],z[j]);
}
} /* np*np*N/4 */
return np;
}
int mr_poly_mul(_MIPD_ int degx,big *x,int degy,big *y,big *z)
{ /* Multiply two polynomials. The big arrays are of size degree */
int i,j,newn,logn,np,degree;
mr_utype inv,p,fac;
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
#ifdef MR_FP_ROUNDING
mr_large ip;
#endif
degree=degx+degy;
if (x==y)
{
mr_poly_sqr(_MIPP_ degx,x,z);
return degree;
}
newn=1; logn=0;
while (degree+1>newn) { newn<<=1; logn++; }
if (mr_mip->logNmodulus,mr_mip->modulus,TRUE);
else np=mr_mip->nprimes;
/* compute coefficients modulo fft primes */
for (i=0;iprime[i];
#ifdef MR_FP_ROUNDING
ip=mr_invert(p);
#endif
for (j=0;j<=degx;j++)
{
if (x[j]==NULL) mr_mip->wa[j]=0;
#ifdef MR_FP_ROUNDING
else mr_mip->wa[j]=mr_sdiv(_MIPP_ x[j],p,ip,mr_mip->w1); /* np*np*N/2 muldivs */
#else
else mr_mip->wa[j]=mr_sdiv(_MIPP_ x[j],p,mr_mip->w1); /* np*np*N/2 muldivs */
#endif
}
for (j=degx+1;jwa[j]=0;
mr_dif_fft(_MIPP_ logn,i,mr_mip->wa); /* np*N*lgN */
for (j=0;j<=degy;j++)
{
if (y[j]==NULL) mr_mip->t[i][j]=0;
#ifdef MR_FP_ROUNDING
else mr_mip->t[i][j]=mr_sdiv(_MIPP_ y[j],p,ip,mr_mip->w1); /* np*np*N/2 */
#else
else mr_mip->t[i][j]=mr_sdiv(_MIPP_ y[j],p,mr_mip->w1); /* np*np*N/2 */
#endif
}
for (j=degy+1;jt[i][j]=0;
mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]); /* np*N*lgN */
/* multiply FFTs */
for (j=0;jwa[j],mr_mip->t[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
#else
muldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
#endif
mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]); /* np*N*lgN */
inv=mr_mip->inverse[i];
if (mr_mip->logN > logn)
{
fac=twop(mr_mip->logN-logn);
inv=smul(fac,inv,p);
}
for (j=0;j<=degree;j++) /* np*N */
#ifdef MR_FP_ROUNDING
imuldiv(mr_mip->t[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
#else
muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
#endif
}
mr_mip->check=OFF;
mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
/* w6 = N.R */
for (j=0;j<=degree;j++)
{
for (i=0;icr[i]=mr_mip->t[i][j];
scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7); /* N*3*np*np/2 */
divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6); /* z[j] may be too big for redc */
redc(_MIPP_ mr_mip->w7,z[j]);
} /* np*np*N/4 */
mr_mip->check=ON;
return degree;
}
int mr_poly_sqr(_MIPD_ int degx,big *x,big *z)
{ /* Multiply two polynomials. The big arrays are of size degree */
int i,j,newn,logn,np,degree;
mr_utype inv,p,fac;
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
#ifdef MR_FP_ROUNDING
mr_large ip;
#endif
degree=2*degx;
newn=1; logn=0;
while (degree+1>newn) { newn<<=1; logn++; }
if (mr_mip->logNmodulus,mr_mip->modulus,TRUE);
else np=mr_mip->nprimes;
/* compute coefficients modulo fft primes */
for (i=0;iprime[i];
#ifdef MR_FP_ROUNDING
ip=mr_invert(p);
#endif
for (j=0;j<=degx;j++)
{
if (x[j]==NULL) mr_mip->t[i][j]=0;
#ifdef MR_FP_ROUNDING
else mr_mip->t[i][j]=mr_sdiv(_MIPP_ x[j],p,ip,mr_mip->w1);
#else
else mr_mip->t[i][j]=mr_sdiv(_MIPP_ x[j],p,mr_mip->w1);
#endif
}
for (j=degx+1;jt[i][j]=0;
mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]);
/* multiply FFTs */
for (j=0;jt[i][j],mr_mip->t[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
#else
muldiv(mr_mip->t[i][j],mr_mip->t[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
#endif
mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]);
inv=mr_mip->inverse[i];
if (mr_mip->logN > logn)
{ /* adjust 1/N log p for smaller N */
fac=twop(mr_mip->logN-logn);
inv=smul(fac,inv,p);
}
for (j=0;j<=degree;j++)
#ifdef MR_FP_ROUNDING
imuldiv(mr_mip->t[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
#else
muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
#endif
}
mr_mip->check=OFF;
mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
/* w6 = N.R */
for (j=0;j<=degree;j++)
{ /* apply CRT to each column */
for (i=0;icr[i]=mr_mip->t[i][j];
scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7);
divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6); /* z[j] may be too big for redc */
redc(_MIPP_ mr_mip->w7,z[j]);
}
mr_mip->check=ON;
return degree;
}
static BOOL init_it(_MIPD_ int logn)
{ /* find primes, table of roots, inverses etc for new N */
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
#ifdef MR_ITANIUM
mr_small tm;
#endif
#ifdef MR_WIN64
mr_small tm;
#endif
zero(mr_mip->w15);
mr_mip->w15->len=2; mr_mip->w15->w[0]=0; mr_mip->w15->w[1]=1;
if (mr_fft_init(_MIPP_ logn,mr_mip->w15,mr_mip->w15,FALSE)!=3) return FALSE;
mr_mip->const1=invers(mr_mip->prime[0],mr_mip->prime[1]);
mr_mip->const2=invers(mr_mip->prime[0],mr_mip->prime[2]);
mr_mip->const3=invers(mr_mip->prime[1],mr_mip->prime[2]);
if (mr_mip->base==0)
{
#ifndef MR_NOFULLWIDTH
mr_mip->msw=muldvd(mr_mip->prime[0],mr_mip->prime[1],(mr_small)0,&mr_mip->lsw);
#endif
}
else mr_mip->msw=muldiv(mr_mip->prime[0],mr_mip->prime[1],(mr_small)0,mr_mip->base,&mr_mip->lsw);
mr_mip->logN=logn;
return TRUE;
}
BOOL fastmultop(_MIPD_ int n,big x,big y,big z)
{ /* only return top n words... assumes x and y are n in length */
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
int len;
mr_mip->check=OFF;
fft_mult(_MIPP_ x,y,mr_mip->w0);
mr_mip->check=ON;
len=mr_lent(mr_mip->w0);
mr_shift(_MIPP_ mr_mip->w0,n-len,mr_mip->w0);
copy(mr_mip->w0,z);
if (len<2*n) return TRUE;
return FALSE;
}
void fft_mult(_MIPD_ big x,big y,big z)
{ /* "fast" O(n.log n) multiplication */
int i,pr,xl,yl,zl,newn,logn;
mr_small v1,v2,v3,ic,c1,c2,p,fac,inv;
#ifdef MR_ITANIUM
mr_small tm;
#endif
#ifdef MR_WIN64
mr_small tm;
#endif
#ifdef MR_FP
mr_small dres;
#endif
mr_lentype sz;
mr_utype *w[3],*wptr,*dptr,*d0,*d1,*d2,t;
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
#ifdef MR_FP_ROUNDING
mr_large ip;
#endif
if (mr_mip->ERNUM) return;
if (y->len==0 || x->len==0)
{
zero(z);
return;
}
MR_IN(72)
if (mr_notint(x) || mr_notint(y))
{
mr_berror(_MIPP_ MR_ERR_INT_OP);
MR_OUT
return;
}
sz=((x->len&MR_MSBIT)^(y->len&MR_MSBIT));
xl=(int)(x->len&MR_OBITS);
yl=(int)(y->len&MR_OBITS);
zl=xl+yl;
if (xl<512 || yl<512) /* should be 512 */
{ /* not worth it! */
multiply(_MIPP_ x,y,z);
MR_OUT
return;
}
if (zl>mr_mip->nib && mr_mip->check)
{
mr_berror(_MIPP_ MR_ERR_OVERFLOW);
MR_OUT
return;
}
newn=1; logn=0;
while (zl>newn) { newn<<=1; logn++;}
if (logn>mr_mip->logN) /* 2^(N+1) settings can be used for 2^N */
{ /* numbers too big for current settings */
if (!init_it(_MIPP_ logn))
{
mr_berror(_MIPP_ MR_ERR_OUT_OF_MEMORY);
MR_OUT
return;
}
}
if (newn>2*mr_mip->nib)
{
mr_berror(_MIPP_ MR_ERR_OVERFLOW);
MR_OUT
return;
}
d0=mr_mip->t[0]; d1=mr_mip->t[1]; d2=mr_mip->t[2];
w[0]=mr_mip->wa; w[1]=mr_mip->wb; w[2]=mr_mip->wc;
fac=twop(mr_mip->logN-logn);
for (pr=0;pr<3;pr++)
{ /* multiply mod each prime */
p=mr_mip->prime[pr];
inv=mr_mip->inverse[pr];
#ifdef MR_FP_ROUNDING
ip=mr_invert(p);
#endif
if (fac!=1) inv=smul(fac,inv,p); /* adjust 1/N mod p */
dptr=mr_mip->t[pr];
wptr=w[pr];
for (i=0;iw[i],p);
for (i=xl;isame || !mr_mip->first_one)
{
for (i=0;iw[i],p);
for (i=yl;iprime[1];
muldiv(t,mr_mip->const1,(mr_small)0,mr_mip->prime[1],(mr_small *)&d1[i]);
}
if (pr==2)
{
t=d2[i]-d0[i];
while (t<0) t+=mr_mip->prime[2];
muldiv(t,mr_mip->const2,(mr_small)0,mr_mip->prime[2],(mr_small *)&t);
t-=d1[i];
while (t<0) t+=mr_mip->prime[2];
muldiv(t,mr_mip->const3,(mr_small)0,mr_mip->prime[2],(mr_small *)&d2[i]);
}
}
}
mr_mip->first_one=TRUE;
zero(z);
c1=c2=0;
if (mr_mip->base==0) for (i=0;iprime[0],v1,&v1);
c1+=v1;
if (c1lsw,v3,c1,&z->w[i]);
c2=muldvd(mr_mip->msw,v3,ic,&c1);
c1+=v2;
if (c1prime[0],v1+c1,mr_mip->base,mr_mip->inverse_base,&v1);
ic=c2+imuldiv(mr_mip->lsw,v3,v1,mr_mip->base,mr_mip->inverse_base,&z->w[i]);
c2=imuldiv(mr_mip->msw,v3,v2+ic,mr_mip->base,mr_mip->inverse_base,&c1);
#else
v2=muldiv(v2,mr_mip->prime[0],(mr_small)(v1+c1),mr_mip->base,&v1);
ic=c2+muldiv(mr_mip->lsw,v3,v1,mr_mip->base,&z->w[i]);
c2=muldiv(mr_mip->msw,v3,(mr_small)(v2+ic),mr_mip->base,&c1);
#endif
}
z->len=(sz|zl); /* set length and sign of result */
mr_lzero(z);
MR_OUT
}
#endif
/*
main()
{
big x,y,z,w;
int i,j,k;
miracl *mip=mirsys(1024,0);
x=mirvar(0);
y=mirvar(0);
z=mirvar(0);
w=mirvar(0);
mip->IOBASE=16;
bigbits(512*MIRACL,x);
bigbits(512*MIRACL,y);
multiply(x,x,z);
cotnum(z,stdout);
fft_mult(x,x,w);
cotnum(w,stdout);
if (mr_compare(z,w)!=0) printf("Problems\n");
}
*/