//
// Institute for Signal Processing (University of Luebeck, Germany)
// Copyright (c) 2011 by Radoslaw Mazur
//
// Permission to use, copy, modify, and distribute this software without
// fee is hereby granted FOR RESEARCH/EDUCATION PURPOSES only, provided
// that this copyright notice appears in all copies and in all supporting
// documentation, and that the software is not redistributed for any
// fee (except for a nominal shipping charge).
//
// For any other uses of this software, in original or modified form,
// including but not limited to consulting, production or distribution
// in whole or in part, specific prior permission must be obtained
// from the author.
// Signal processing methods and algorithms implemented by this
// software may be claimed by patents owned by others.
//
// The author makes no representation about the suitability of this
// software for any purpose. It is provided "as is" without warranty
// of any kind, either expressed or implied.
// Beware of the bugs.
//
//     Revision history
//
//     Ver     Date         Description
//     -------------------------------------------------------------------
//     0.5     14-10-2011   basic version


#include <iostream>
//#include <stdio.h>
#include <time.h>
using namespace std;


#include "../include/shorteningData.h"
#include "../include/kernels.h"

shorteningData::shorteningData(double *cc, double *wwd, double *wwu, double *hh, double *ret_re, int cl, int wl, int hl , int fl, int spk, int mic):
    c(cc), wd(wwd), wu(wwu), h(hh), ret(ret_re),
    c_len(cl), w_len(wl), h_len(hl), fft_len(fl),
    speakers(spk), microphones(mic) {

        c_ges_len = fft_len * speakers * microphones;
        w_ges_len = fft_len * microphones;
        h_ges_len = fft_len * speakers;

        // time used for cuda init
        clock_t start,  end;
        start = clock();

        cudaMalloc( (void**)&dev_c,  c_ges_len * sizeof(double));
        cudaMalloc( (void**)&dev_wd, w_ges_len * sizeof(double));
        cudaMalloc( (void**)&dev_wu, w_ges_len * sizeof(double));
        cudaMalloc( (void**)&dev_h,  h_ges_len * sizeof(double));


        cudaMalloc( (void**)&C,  c_ges_len * sizeof(Complex));
        clear_mem<<<(c_ges_len+255)/256,256>>>(c_ges_len, C);

        cudaMalloc( (void**)&Ck, c_ges_len * sizeof(Complex));
        clear_mem<<<(c_ges_len+255)/256,256>>>(c_ges_len, Ck);


        int tmp_size = max(w_ges_len,h_ges_len);

        cudaMalloc( (void**)&conv_tmp_in, tmp_size * sizeof(Complex));
        clear_mem<<<(tmp_size+255)/256,256>>>(c_ges_len, conv_tmp_in);
        cudaMalloc( (void**)&conv_tmp_out, tmp_size * sizeof(Complex));
        clear_mem<<<(tmp_size+255)/256,256>>>(c_ges_len, conv_tmp_out);


        cudaMalloc( (void**)&gk,  w_ges_len * sizeof(double));
        cudaMalloc( (void**)&bu,  w_ges_len * sizeof(double));
        cudaMalloc( (void**)&bd,  w_ges_len * sizeof(double));

        cudaMalloc( (void**)&add_tmp1,  w_ges_len * sizeof(double));
        cudaMalloc( (void**)&add_tmp2,  w_ges_len * sizeof(double));

        cudaMalloc( (void**)&gradient_out,  w_ges_len * sizeof(double));
        cudaMalloc( (void**)&F1,  h_ges_len * sizeof(double));

        cudaMalloc( (void**)&add_partial,  256 * sizeof(double));

        cudaMalloc( (void**)&SAgu,  1 * sizeof(double));
        cudaMalloc( (void**)&SAgd,  1 * sizeof(double));

        end = clock();
        cout << "Time used for CUDA init: " << ((double)(end-start))/((double)(CLOCKS_PER_SEC)) << "s" <<endl;

    }



shorteningData::~shorteningData() {
    cudaFree(dev_c);
    cudaFree(dev_wd);
    cudaFree(dev_wu);
    cudaFree(dev_h);


    cudaFree(C);
    cudaFree(Ck);

    cudaFree(conv_tmp_in);
    cudaFree(conv_tmp_out);

    cudaFree(gk);
    cudaFree(bu);
    cudaFree(bd);


    cudaFree(add_tmp1);
    cudaFree(add_tmp2);

    cudaFree(gradient_out);
    cudaFree(F1);

    cudaFree(add_partial);

    cudaFree(SAgu);
    cudaFree(SAgd);

}



void shorteningData::copyToDevice(){
    size_t msize = fft_len * sizeof(double);
    cudaMemcpy(dev_c,  c,   c_ges_len * sizeof(double), cudaMemcpyHostToDevice);
    cudaMemcpy(dev_wd, wd,  w_ges_len * sizeof(double), cudaMemcpyHostToDevice);
    cudaMemcpy(dev_wu, wu,  w_ges_len * sizeof(double), cudaMemcpyHostToDevice);
    cudaMemcpy(dev_h,  h,   h_ges_len * sizeof(double), cudaMemcpyHostToDevice);
};
void shorteningData::copyToHost(){
    cudaMemcpy(ret,  dev_h,  h_ges_len * sizeof(double), cudaMemcpyDeviceToHost);
};
