--- a/src/goertzel.c
+++ b/src/goertzel.c
@@ -16,10 +16,156 @@
 #endif
 
 
+LTFAT_EXTERN
+LTFAT_NAME(gga_plan) LTFAT_NAME(create_gga_plan)(const double *indVecPtr, const size_t M, const size_t L)
+{
+LTFAT_REAL* cos_term = (LTFAT_REAL*) ltfat_malloc(M*sizeof(LTFAT_REAL));
+LTFAT_COMPLEXH* cc_term = (LTFAT_COMPLEXH*) ltfat_malloc(M*sizeof(LTFAT_COMPLEXH));
+LTFAT_COMPLEXH* cc2_term = (LTFAT_COMPLEXH*) ltfat_malloc(M*sizeof(LTFAT_COMPLEXH));
+
+LTFAT_REAL pik_term_pre = (LTFAT_REAL) (2.0*PI/((double) L));
+LTFAT_REAL _Complex cc2_pre = (LTFAT_REAL _Complex) (-1.0*I*((double)(L-1)));
+LTFAT_REAL _Complex cc_pre =  (LTFAT_REAL _Complex) (-1.0*I*((double)(L)));
+
+for(size_t m=0;m<M;m++)
+{
+   LTFAT_REAL pik_term = pik_term_pre*indVecPtr[m];
+   cos_term[m] = (LTFAT_REAL) cos(pik_term)*2.0;
+   cc_term[m] = (LTFAT_COMPLEXH) cexp(cc_pre*pik_term);
+   cc2_term[m] = (LTFAT_COMPLEXH) cexp(cc2_pre*pik_term);
+}
+
+
+LTFAT_NAME(gga_plan) plan = {.cos_term=cos_term,.cc_term=cc_term,.cc2_term=cc2_term,.M=M,.L=L};
+return plan;
+}
+
+LTFAT_EXTERN
+void LTFAT_NAME(destroy_gga_plan)(LTFAT_NAME(gga_plan) plan)
+{
+   ltfat_free(plan.cos_term);
+   ltfat_free(plan.cc_term);
+   ltfat_free(plan.cc2_term);
+}
+
+
+LTFAT_EXTERN
+void LTFAT_NAME(gga_with_plan)(LTFAT_NAME(gga_plan) p,
+                               const LTFAT_TYPE *fPtr,
+                               LTFAT_COMPLEXH *cPtr,
+                               const size_t W )
+{
+#ifndef GGA_UNROLL
+
+for(int w=0;w<W;w++)
+{
+LTFAT_COMPLEXH *cPtrTmp = (LTFAT_COMPLEXH*) cPtr+w*p.M;
+
+for(int m=0;m<p.M;m++)
+{
+      LTFAT_TYPE s0 =  0.0;
+      LTFAT_TYPE s1 =  0.0;
+      LTFAT_TYPE s2 =  0.0;
+      LTFAT_TYPE *fPtrTmp = (LTFAT_TYPE*) fPtr+w*p.L;
+
+      for(int ii=0;ii<p.L-1;ii++)
+      {
+         s0 = *fPtrTmp++ + p.cos_term[m]*s1 - s2;
+         s2=s1;
+         s1=s0;
+      }
+      s0 = *fPtrTmp + p.cos_term[m]*s1 - s2;
+
+      *cPtrTmp++ = (s0*p.cc2_term[m] - s1*p.cc_term[m]);
+   }
+}
+#else
+for(int w=0;w<W;w++)
+{
+   LTFAT_COMPLEXH *cPtrTmp = (LTFAT_COMPLEXH*) cPtr+w*p.M;
+   int unrollRem = p.M%GGA_UNROLL;
+
+   const LTFAT_REAL* cos_term = p.cos_term;
+   const LTFAT_COMPLEXH* cc_term = p.cc_term;
+   const LTFAT_COMPLEXH* cc2_term = p.cc2_term;
+//#pragma omp parallel for
+   for(int m=0;m<p.M-unrollRem;m+=GGA_UNROLL)
+   {
+      LTFAT_TYPE s0[GGA_UNROLL];
+      LTFAT_TYPE s1[GGA_UNROLL];
+      LTFAT_TYPE s2[GGA_UNROLL];
+
+      for(int un=0;un<GGA_UNROLL;un++)
+      {
+         s0[un] = 0.0;
+         s1[un] = 0.0;
+         s2[un] = 0.0;
+      }
+
+      LTFAT_TYPE *fPtrTmp = (LTFAT_TYPE*) fPtr+w*p.L;
+
+      for(int ii=0;ii<p.L-1;ii++)
+      {
+         for(int un=0;un<GGA_UNROLL;un++)
+         {
+            s0[un] = *fPtrTmp + cos_term[un]*s1[un] - s2[un];
+            s2[un]=s1[un];
+            s1[un]=s0[un];
+         }
+         fPtrTmp++;
+      }
+      for(int un=0;un<GGA_UNROLL;un++)
+      {
+         s0[un] = *fPtrTmp + cos_term[un]*s1[un] - s2[un];
+         cPtrTmp[m+un] = (s0[un]*cc2_term[un] - s1[un]*cc_term[un]);
+      }
+      cos_term+=GGA_UNROLL;
+      cc_term+=GGA_UNROLL;
+      cc2_term+=GGA_UNROLL;
+   }
+
+   int m= p.M-unrollRem;
+
+
+      LTFAT_TYPE s0[GGA_UNROLL];
+      LTFAT_TYPE s1[GGA_UNROLL];
+      LTFAT_TYPE s2[GGA_UNROLL];
+
+   for(int un=0;un<unrollRem;un++)
+   {
+       s0[un] = 0.0;
+       s1[un] = 0.0;
+       s2[un] = 0.0;
+   }
+
+      LTFAT_TYPE *fPtrTmp = (LTFAT_TYPE*) fPtr+w*p.L;
+
+      for(int ii=0;ii<p.L-1;ii++)
+      {
+         for(int un=0;un<unrollRem;un++)
+         {
+            s0[un] = *fPtrTmp + cos_term[un]*s1[un] - s2[un];
+            s2[un]=s1[un];
+            s1[un]=s0[un];
+         }
+         fPtrTmp++;
+      }
+
+      for(int un=0;un<unrollRem;un++)
+      {
+         s0[un] = *fPtrTmp + cos_term[un]*s1[un] - s2[un];
+         cPtrTmp[m+un] = (s0[un]*cc2_term[un] - s1[un]*cc_term[un]);
+      }
+
+}
+#endif
+}
+
+
 
 LTFAT_EXTERN
 void LTFAT_NAME(gga)(const LTFAT_TYPE *fPtr, const double *indVecPtr,
-                const int L, const int W, const int M, LTFAT_COMPLEXH *cPtr)
+                const size_t L, const size_t W, const size_t M, LTFAT_COMPLEXH *cPtr)
 {
 
 double pik_term_pre = 2.0*PI/((double) L);
@@ -151,5 +297,363 @@
 
 }
 
+
+
+LTFAT_EXTERN
+void LTFAT_NAME(chzt)(const LTFAT_TYPE *fPtr, const size_t L, const size_t W, const size_t K,
+                      const double deltao, const double o, LTFAT_COMPLEXH *cPtr)
+{
+LTFAT_NAME(chzt_plan) p = LTFAT_NAME(create_chzt_plan)(K,L,deltao, o,FFTW_ESTIMATE,CZT_NEXTFASTFFT);
+
+LTFAT_NAME(chzt_with_plan)(p, fPtr, W,deltao, o, cPtr);
+
+LTFAT_NAME(destroy_chzt_plan)(p);
+}
+
+
+LTFAT_EXTERN
+void LTFAT_NAME(chzt_with_plan)(LTFAT_NAME(chzt_plan) p, const LTFAT_TYPE *fPtr, const size_t W,
+                        const double deltao, const double o, LTFAT_COMPLEXH *cPtr)
+{
+
+    size_t L = p.L;
+    size_t K = p.K;
+    size_t Lfft = p.Lfft;
+    LTFAT_COMPLEXH* fbuffer = p.fbuffer;
+    LTFAT_FFTW(plan) plan_f = p.plan;
+    LTFAT_FFTW(plan) plan_fi = p.plan2;
+    LTFAT_COMPLEXH* W2 = p.W2;
+    LTFAT_COMPLEXH* Wo = p.Wo;
+    LTFAT_COMPLEXH* chirpF = p.chirpF;
+
+
+    // Temporal storage of the post chirp as a last channel of the output
+    //LTFAT_COMPLEXH *cPtrLast = cPtr+K*(W-1);
+    //memcpy(cPtrLast,W2,K*sizeof(LTFAT_COMPLEXH));
+
+    for(size_t w = 0;w<W;w++)
+    {
+       memset(fbuffer,0,Lfft*sizeof(LTFAT_COMPLEXH));
+       LTFAT_NAME(array2complex)(fPtr+w*L,fbuffer,L);
+
+       //1) Premultiply by a chirp
+
+       for(size_t ii=0;ii<L;ii++)
+       {
+           fbuffer[ii]*=Wo[ii];
+       }
+
+       // 2) FFT of input
+       LTFAT_FFTW(execute)(plan_f);
+
+/*
+       LTFAT_NAME_COMPLEX(conjugate_array)(W2,W2,L);
+       LTFAT_NAME_COMPLEX(reverse_array)(W2,W2,L);
+       memcpy(W2+L,cPtrLast+1,(K-1)*sizeof(LTFAT_COMPLEXH));
+       LTFAT_NAME_COMPLEX(conjugate_array)(W2+L,W2+L,K-1);
+
+       // FFT of the chirp filter
+       LTFAT_FFTW(execute_dft)(plan_f,W2,W2);
+*/
+       // Frequency domain filtering
+       for(size_t ii=0;ii<Lfft;ii++)
+       {
+           fbuffer[ii]*=chirpF[ii];
+       }
+
+
+       // Inverse FFT using forward FFT plan
+       LTFAT_COMPLEXH *fPtrTmp = fbuffer;
+       //LTFAT_NAME_COMPLEX(conjugate_array)(fbuffer,fbuffer,Lfft);
+       LTFAT_FFTW(execute)(plan_fi);
+       //LTFAT_NAME_COMPLEX(conjugate_array)(fPtrTmp,fPtrTmp,K);
+
+
+       // Final chirp multiplication and normalization
+       LTFAT_COMPLEXH *cPtrTmp = cPtr + w*K;
+       for(size_t ii=0;ii<K;ii++)
+       {
+           cPtrTmp[ii]=fPtrTmp[ii]*W2[ii];
+       }
+
+    }
+
+}
+
+LTFAT_EXTERN
+LTFAT_NAME(chzt_plan) LTFAT_NAME(create_chzt_plan)(const size_t K, size_t L, const double deltao, const double o, const unsigned fftw_flags, const unsigned czt_flags)
+{
+    size_t Lfft = L+K-1;
+
+    if(czt_flags == 0)
+     Lfft = nextPow2_st(Lfft);
+    else
+     Lfft = nextfastfft(Lfft);
+
+    LTFAT_COMPLEXH* fbuffer = ltfat_malloc(Lfft*sizeof(LTFAT_COMPLEXH));
+    LTFAT_FFTW(plan) plan_f =  LTFAT_FFTW(plan_dft_1d)(Lfft, (LTFAT_COMPLEX*)fbuffer, (LTFAT_COMPLEX*) fbuffer,
+                                                       FFTW_FORWARD, fftw_flags);
+        LTFAT_FFTW(plan) plan_fi =  LTFAT_FFTW(plan_dft_1d)(Lfft, (LTFAT_COMPLEX*)fbuffer, (LTFAT_COMPLEX*) fbuffer,
+                                                       FFTW_BACKWARD, fftw_flags);
+
+    // Pre and post chirp
+    size_t N = L>K?L:K;
+    LTFAT_COMPLEXH* W2 = ltfat_malloc(Lfft*sizeof(LTFAT_COMPLEXH));
+    LTFAT_COMPLEXH* chirpF = ltfat_malloc(Lfft*sizeof(LTFAT_COMPLEXH));
+    LTFAT_COMPLEXH* Wo = ltfat_malloc(L*sizeof(LTFAT_COMPLEXH));
+
+
+    for(size_t ii=0;ii<N;ii++)
+    {
+        W2[ii] = (LTFAT_COMPLEXH) cexp(-1.0*I*deltao*ii*ii/2.0);
+    }
+
+    for(size_t ii=0;ii<L;ii++)
+    {
+        Wo[ii] = (LTFAT_COMPLEXH) cexp(-1.0*I*o*ii)*W2[ii];
+    }
+    // Set the rest to zero
+    memset(W2+N,0,(Lfft-N)*sizeof(LTFAT_COMPLEXH));
+
+    LTFAT_NAME_COMPLEX(conjugate_array)(W2,chirpF,K);
+    //LTFAT_NAME_COMPLEX(reverse_array)(chirpF,chirpF,L);
+    // memcpy(chirpF+Lfft-L,W2+1,(K-1)*sizeof(LTFAT_COMPLEXH));
+    //LTFAT_NAME_COMPLEX(conjugate_array)(chirpF+L,chirpF+L,K-1);
+    LTFAT_NAME_COMPLEX(conjugate_array)(W2+1,chirpF+Lfft-L+1,L-1);
+    LTFAT_NAME_COMPLEX(reverse_array)(chirpF+Lfft-L+1,chirpF+Lfft-L+1,L-1);
+    memset(chirpF+K,0,(Lfft-(L+K-1))*sizeof(LTFAT_COMPLEXH));
+
+    LTFAT_FFTW(execute_dft)(plan_f,chirpF,chirpF);
+
+
+    for(size_t ii=0;ii<K;ii++)
+    {
+        W2[ii] = (LTFAT_COMPLEXH) cexp(-1.0*I*deltao*ii*ii/2.0)/((LTFAT_REAL) Lfft);
+    }
+
+
+
+    /*
+    We could have shrinked the W2 to length K here.
+    */
+    LTFAT_NAME(chzt_plan) p = {.fbuffer = fbuffer, .plan=plan_f,.plan2=plan_fi,.L=L,
+                               .K=K, .W2 = W2, .Wo=Wo,.chirpF=chirpF,.Lfft=Lfft};
+    return  p;
+}
+
+LTFAT_EXTERN
+void LTFAT_NAME(destroy_chzt_plan)(LTFAT_NAME(chzt_plan) p)
+{
+   ltfat_free(p.fbuffer);
+   ltfat_free(p.W2);
+   ltfat_free(p.Wo);
+   ltfat_free(p.chirpF);
+   LTFAT_FFTW(destroy_plan)(p.plan);
+   LTFAT_FFTW(destroy_plan)(p.plan2);
+}
+
+
+
+
+LTFAT_EXTERN
+void LTFAT_NAME(chzt_fact)(const LTFAT_TYPE *fPtr, const size_t L, const size_t W, const size_t K,
+                        const double deltao, const double o, LTFAT_COMPLEXH *cPtr)
+{
+LTFAT_NAME(chzt_plan) p = LTFAT_NAME(create_chzt_plan_fact)(K,L,deltao, o,FFTW_ESTIMATE, CZT_NEXTFASTFFT);
+
+LTFAT_NAME(chzt_with_plan_fact)(p, fPtr, W,deltao, o, cPtr);
+
+LTFAT_NAME(destroy_chzt_plan)(p);
+}
+
+LTFAT_EXTERN
+void LTFAT_NAME(chzt_with_plan_fact)(LTFAT_NAME(chzt_plan) p, const LTFAT_TYPE *fPtr, const size_t W,
+                        const double deltao, const double o, LTFAT_COMPLEXH *cPtr)
+{
+    size_t L = p.L;
+    size_t K = p.K;
+    size_t Lfft = p.Lfft;
+    LTFAT_COMPLEXH* fbuffer = p.fbuffer;
+    LTFAT_FFTW(plan) plan_f = p.plan;
+    LTFAT_FFTW(plan) plan_fi = p.plan2;
+    LTFAT_COMPLEXH* W2 = p.W2;
+    LTFAT_COMPLEXH* Wo = p.Wo;
+    LTFAT_COMPLEXH* chirpF = p.chirpF;
+
+    LTFAT_COMPLEXH* fBufTmp;
+    size_t q = (size_t) ceil(((double)L)/((double)K));
+
+    size_t lastK = (L/q);
+
+    for(size_t w = 0;w<W;w++)
+    {
+      // *********************************
+      // 1) Read and reorganize input data
+      // *********************************
+       memset(fbuffer,0,q*Lfft*sizeof(LTFAT_COMPLEXH));
+       LTFAT_TYPE* fPtrTmp = fPtr + w*L;
+
+       for(size_t k=0;k<lastK;k++)
+       {
+          LTFAT_TYPE* fTmp = fPtrTmp + k*q;
+          fBufTmp = fbuffer + k;
+          for(size_t jj=0;jj<q;jj++)
+          {
+            *fBufTmp = (LTFAT_COMPLEXH) fTmp[jj];
+            fBufTmp+=Lfft;
+          }
+       }
+
+       LTFAT_TYPE* fTmp = fPtrTmp + lastK*q;
+       fBufTmp = fbuffer + lastK;
+       for(size_t jj=0;jj<L-lastK*q;jj++)
+       {
+         *fBufTmp = (LTFAT_COMPLEXH) fTmp[jj];
+         fBufTmp+=Lfft;
+       }
+
+      // *********************************
+      // 2) Premultiply
+      // *********************************
+
+       fBufTmp = fbuffer;
+       for(size_t jj=0;jj<q;jj++)
+       {
+          for(size_t k=0;k<K;k++)
+          {
+             fBufTmp[k]*=W2[k];
+          }
+          fBufTmp+=Lfft;
+       }
+
+      // *********************************
+      // 3) q ffts of length Lfft
+      // *********************************
+       LTFAT_FFTW(execute)(plan_f);
+
+      // *********************************
+      // 4) Filter
+      // *********************************
+       fBufTmp = fbuffer;
+       // Frequency domain filtering
+       for(size_t jj=0;jj<q;jj++)
+       {
+          for(size_t ii=0;ii<Lfft;ii++)
+          {
+             fBufTmp[ii]*=chirpF[ii];
+          }
+          fBufTmp+=Lfft;
+       }
+
+      // *********************************
+      // 5) q iffts of length Lfft
+      // *********************************
+       LTFAT_FFTW(execute)(plan_fi);
+
+
+      // *********************************
+      // 6) Postmultiply
+      // *********************************
+       fBufTmp = fbuffer;
+       LTFAT_COMPLEXH* Wotmp = Wo;
+       for(size_t jj=0;jj<q;jj++)
+       {
+          for(size_t k=0;k<K;k++)
+          {
+             fBufTmp[k]*=Wotmp[k];
+          }
+          fBufTmp+=Lfft;
+          Wotmp+=K;
+       }
+
+      // *********************************
+      // 7) Sum cols
+      // *********************************
+       LTFAT_COMPLEXH *cPtrTmp = cPtr + w*K;
+       for(size_t k=0;k<K;k++)
+       {
+           fBufTmp = fbuffer + k;
+           cPtrTmp[k] = (LTFAT_COMPLEXH) 0.0;
+           for(size_t jj=0;jj<q;jj++)
+           {
+              cPtrTmp[k]+= *fBufTmp;
+              fBufTmp+=Lfft;
+           }
+       }
+
+    }
+}
+
+LTFAT_EXTERN
+LTFAT_NAME(chzt_plan) LTFAT_NAME(create_chzt_plan_fact)(const size_t K, const size_t L, const double deltao, const double o, const unsigned fftw_flags, const unsigned czt_flags)
+{
+
+    size_t Lfft=2*K-1;
+    if(czt_flags == 0)
+     Lfft = nextPow2_st(Lfft);
+    else
+     Lfft = nextfastfft(Lfft);
+
+    size_t q = (size_t) ceil(((double)L)/((double)K));
+
+    LTFAT_COMPLEXH* fbuffer = ltfat_malloc(q*Lfft*sizeof(LTFAT_COMPLEXH));
+
+    const LTFAT_FFTW(iodim) dims = {.n = Lfft, .is = 1, .os = 1};
+    const LTFAT_FFTW(iodim) howmany_dims = {.n = q,.is = Lfft, .os = Lfft};
+    LTFAT_FFTW(plan) plan_f =  LTFAT_FFTW(plan_guru_dft)(1, &dims, 1, &howmany_dims,
+                                                         (LTFAT_COMPLEX*)fbuffer, (LTFAT_COMPLEX*) fbuffer,
+                                                         FFTW_FORWARD, fftw_flags);
+
+    LTFAT_FFTW(plan) plan_fi =  LTFAT_FFTW(plan_guru_dft)(1, &dims, 1, &howmany_dims,
+                                                         (LTFAT_COMPLEX*)fbuffer, (LTFAT_COMPLEX*) fbuffer,
+                                                         FFTW_BACKWARD, fftw_flags);
+
+    LTFAT_COMPLEXH* W2 = ltfat_malloc(K*sizeof(LTFAT_COMPLEXH));
+    LTFAT_COMPLEXH* chirpF = ltfat_malloc(Lfft*sizeof(LTFAT_COMPLEXH));
+    LTFAT_COMPLEXH* Wo = ltfat_malloc(q*K*sizeof(LTFAT_COMPLEXH));
+
+    LTFAT_FFTW(plan) plan_chirpF =  LTFAT_FFTW(plan_dft_1d)(Lfft, (LTFAT_COMPLEX*)chirpF, (LTFAT_COMPLEX*) chirpF,
+                                                       FFTW_FORWARD, fftw_flags);
+
+
+    // Pre and post chirp
+    //size_t N = L>K?L:K;
+
+    for(size_t k=0;k<K;k++)
+    {
+        W2[k] = (LTFAT_COMPLEXH) cexp(-1.0*q*I*deltao*k*k/2.0);
+    }
+
+    LTFAT_NAME_COMPLEX(conjugate_array)(W2,chirpF,K);
+    LTFAT_NAME_COMPLEX(conjugate_array)(W2+1,chirpF+Lfft-K+1,K-1);
+    LTFAT_NAME_COMPLEX(reverse_array)(chirpF+Lfft-K+1,chirpF+Lfft-K+1,K-1);
+    memset(chirpF+K,0,(Lfft-(2*K-1))*sizeof(LTFAT_COMPLEXH));
+    LTFAT_FFTW(execute)(plan_chirpF);
+    LTFAT_FFTW(destroy_plan)(plan_chirpF);
+
+    LTFAT_REAL oneoverLfft = 1.0/((LTFAT_REAL) Lfft);
+
+    for(size_t jj=0;jj<q;jj++)
+    {
+       LTFAT_COMPLEXH* Wotmp = Wo + jj*K;
+       for(size_t k=0;k<K;k++)
+       {
+          Wotmp[k] = (LTFAT_COMPLEXH) cexp(-1.0*I*jj*(k*deltao+o))*W2[k]*oneoverLfft;
+       }
+    }
+
+    for(size_t k=0;k<K;k++)
+    {
+       W2[k] *= (LTFAT_COMPLEXH) cexp(-1.0*I*k*q*o);
+    }
+
+
+    LTFAT_NAME(chzt_plan) p = {.fbuffer = fbuffer, .plan=plan_f, .plan2=plan_fi,.L=L,
+                               .K=K, .W2 = W2, .Wo=Wo,.chirpF=chirpF,.Lfft=Lfft};
+    return  p;
+}
+
+
+
 #endif // LTFAT_TYPE