/*#define DOUBLE_CHECK_RESULT*/
/*#define DMA*/


#include <stdio.h>
#include <stdlib.h>
#include <sys/resource.h>
#ifndef GCC
#include <c_asm.h>
#endif
#include "memmap.h"
#include "phibutil.h"

#define DEFINE_IN_GRAPPE6UTIL 

#include "../grape6/g6chip_simlib.h"
#include "grape6util-local.h"
#include "grape6util.h"
#include "grape6.h"

#define NMAX  200000


void construct_nblist(int n, int i, double x[][3], double eps2, double h2,
		      int *nnb,  int nbl[])
{
    int j, k;
    double r2;
    *nnb = 0;
    for(j=0;j<n;j++){
	if (i !=  j){
	    r2 = eps2;
	    for(k=0;k<3;k++){
		double dx = x[i][k] - x[j][k];
		r2 += dx*dx;
	    }
	    if (r2 < h2){
		nbl[*nnb] = j;
		(*nnb) ++;
	    }
	}
    }
}


void dump_nbl(char * cp, int nnb, int nbl[])
{
    int i;
    fprintf(stderr, "%-5s nnb=%4d ", cp, nnb);
    for(i=0;i<nnb; i++){
	fprintf(stderr," %5d", nbl[i]);
	if ((i % 15) == 14)fprintf(stderr,"\n              ");
    }
    fprintf(stderr,"\n");
}

int compare_lists(int ip, int nnb, int nbl[],int nnb2, int nbl2[])
{
    int i,i0,i1;
    int iret = -1;
    if (nnb != nnb2){
	printf("compare_list: Different counts %d %d %d\n",
	      ip,nnb,nnb2);
    }
    for(i=i0=i1=0;i<nnb;i++){
	if(nbl[i0] != nbl[i1]){
	    printf("compare_list: Different nb %d %d %d %d\n",
		  i0, nbl[i0],i1,nbl[i1]);
	    if(nbl[i0]>nbl[i1]){
		iret = i1;
		i1++;
	    }else{
		iret = i0;
		i++;i0++;
	    }	    
	}
	i0++;i1++;
    }
    if (iret > 0)printf("\n");
}


int main(void)
{
    static  double x[NMAX][3];
    static      double v[NMAX][3];
    static      double a2[NMAX][3];
    static      double j6[NMAX][3];
    static      double jd18[NMAX][3];
    static      double acc[NMAX][3];
    static      double jerk[NMAX][3];
    static      double pot[NMAX];
    static      int    flag[NMAX];
    static      double m[NMAX];
    static      double t[NMAX];
    static      double dt[NMAX];
    static      int nbl[NMAX];
    static      int nblh[NMAX];
    static      int nnbindex[NMAX];
    int i,k,n,ii;
    int clusterid = 0;
    double ti = 0;
    double eps2 = 0.01;
    double h2 = 0.1;
    double fscale = m[0]/eps2*100;
    double phiscale = m[0]/sqrt(eps2)*100;
    int ipmax = g6_npipes();
    int itest = 0;
    double start_time, current_time, nbtotal;
    nbtotal = 0;
restart:
#ifndef TIMETEST    
    n = 30000;
#else    
    n = 30000;
#endif    
    h2 = 15*pow((double)n,-0.666666666);
    srand48(12345);
    g6_open_(&clusterid);
    start_time = g6_cpu_second();
teststart:
    for(i=0;i<n;i++){
	for(k=0;k<3;k++){
	    x[i][k] = drand48();
	    v[i][k] = drand48();
	    a2[i][k] = j6[i][k] = jd18[i][k] = 0;
	    acc[i][k] = jerk[i][k] = 1;
	}
	for(k=0;k<3;k++) x[0][k] = 0;
	m[i] = 1.0/n;
	t[i]  = 0;
	dt[i] = 0.125;
	pot[i] = 1;
    }
    
    g6errorcounter_clear(0);
    for(i=0;i<n;i++){
	for(k=0;k<3;k++)acc[i][k] = fscale;
	pot[i] = phiscale;
    }
#ifndef TIMETEST
    calculate_accel_by_grape6_noopen(clusterid,n,x,v,m,acc,jerk,pot,eps2);
    for(i=0;i<10;i++){ 
	fprintf(stderr,"test calculation of n-i = %d\n", n-i);
	calculate_accel_by_grape6_noopen(clusterid,n-i,x,v,m,acc,jerk,pot,eps2);
    }
    fprintf(stderr,"test calculation of n = %d\n", n);
#endif
    calculate_accel_by_grape6_noopen(clusterid,n,x,v,m,acc,jerk,pot,eps2);
#if 0
    for(i=0;i<n;i++){
	fprintf(stderr,"i:%5d acc: %e  %e  %e \n          jerk %e  %e  %e \n pot, flag: %e %x\n",
	       i, acc[i][0], acc[i][1], acc[i][2],
	       jerk[i][0], jerk[i][1], jerk[i][2],
	       pot[i], flag[i]);
    }
#endif
    /* start testing the neighbour list */
    g6_set_ti_(&clusterid, &ti);
    for(i=0;i<n;i++){
	g6_set_j_particle_(&clusterid,&i,&i,t+i,dt+i,m+i,
			   jd18[i],j6[i],a2[i],v[i],x[i]);
    }
#ifdef TIMETEST
    g6_set_neighbour_list_sort_mode(itest &1);
#endif    
    for(i=0;i<n;i+=ipmax){
	int nerror = 0;
	int error, iend;
	iend = ipmax; if  (iend+i > n) iend = n-i;
	do{
	    /* repeat if flags are not too good */
	    /* neglect: all errors in jerk (17 downto 9) and underflow */
	    /* i.e. 1 0011 1111 1111 0010 0100 = 0x13FF24 */
	    for (ii= 0; ii<iend; ii++){
		double one = 1.0;
		double zero = 0.0;
		int ip = ii+i;
		int ii2 = ii+iend;
		g6_set_i_particle_scales_from_real_value_(&clusterid, &ii, acc[ip],
							  jerk[ip], pot+ip,&one,&zero);
		g6_set_i_particle_(&clusterid, &ii, &ip, x[ip], v[ip], &eps2, &h2);
	    }
	    g6_set_nip_(&clusterid, &iend);
	    g6_set_njp_(&clusterid, &n);
	    error = g6_get_force_etc_(&clusterid, acc+i, jerk+i, pot+i, nnbindex, flag);
	    for (ii= 0; ii<iend; ii++){
		if (nnbindex[ii] >= n){
		    fprintf(stderr,"(get_force) nng error %x >  %x  -- retry\n",
			    nnbindex[ii], n);
		}
		    
		if(flag[ii] & (~ FLAGMASK)){
		    error ++;
		    fprintf(stderr,"(calc_force) overflow for %d: %x  -- retry\n",
			    i+ii, flag[ii]);
		}
		
		if(flag[ii] & (CERRFLAGMASK)){
		    error ++;
		    fprintf(stderr,"(calc_force) communication error  %x -- abort\n",
			    flag[ii]);
		    dump_dmawdata(clusterid, iend*14);
		    goto restart;
		}
	    }
	    if (nerror > 10){
		fprintf(stderr,"(calc_force) too many errors: abort\n");
		goto restart;
	    }
	}while(error);
	//	test_nb_read_multi();
	g6_read_neighbour_list_(&clusterid);
#if 1	
	for (ii= 0; ii<iend; ii++){
	    int nnb,nnbh,ierr;
	    int ip = ii+i;
	    g6_get_neighbour_list_(&clusterid, &ii, &n, &nnb, nbl);
	    /*	    dump_nbl("G6 ", nnb, nbl);*/
	    nbtotal += nnb;
#ifndef TIMETEST
	    construct_nblist(n, ip, x, eps2, h2,&nnbh,  nblh);
	    /*	    dump_nbl("H ", nnb, nbl);*/
	    ierr=compare_lists(ii+i,nnb,nbl,nnbh,nblh);
	    printf("%d %d\r",i+ii,nnb);
#endif
	}
#endif	    
    }
    current_time = g6_cpu_second();
    fprintf(stderr, "Time, nblength = %e %e\n", current_time-start_time, nbtotal);
    printf("\nTest %d end\n",itest ++);
#ifdef TIMETEST    
    if (itest == 50) return 0;
#endif	
    goto teststart;
    return 0;
}

