1、网上下的程序,但是我自己运行生成exe,不报错,但是结果都是0,是环境配置的问题吗?
环境配置错误会导致运行结果吗?
2、我是在cuda4.0+VS2008+Win7环境下运行的。
3、
#include <float.h> //for FLT_MAX
#include <stdio.h>
#include <cutil.h>
#include "CUDAMCML.h"
__device__ __constant__ unsigned int num_photons_dc[1];
__device__ __constant__ unsigned int n_layers_dc[1];
__device__ __constant__ unsigned int start_weight_dc[1];
__device__ __constant__ LayerStruct layers_dc[MAX_LAYERS];
__device__ __constant__ DetStruct det_dc[1];
#include "CUDAMCMLmem.cu"
#include "CUDAMCMLio.cu"
#include "CUDAMCMLrng.cu"
#include "CUDAMCMLtransport.cu"
// wrapper for device code
void DoOneSimulation(SimulationStruct* simulation, unsigned long long* x,unsigned int* a)
{
MemStruct DeviceMem;
MemStruct HostMem;
unsigned int threads_active_total=1;
unsigned int i,ii;
cudaError_t cudastat;
clock_t time1,time2;
// Start the clock
time1=clock();
// x and a are already initialised in memory
HostMem.x=x;
HostMem.a=a;
InitMemStructs(&HostMem,&DeviceMem, simulation);
InitDCMem(simulation);
dim3 dimBlock(NUM_THREADS_PER_BLOCK);
dim3 dimGrid(NUM_BLOCKS);
LaunchPhoton_Global<<<dimGrid,dimBlock>>>(DeviceMem);
CUDA_SAFE_CALL( cudaThreadSynchronize() ); // Wait for all threads to finish
cudastat=cudaGetLastError(); // Check if there was an error
if(cudastat)printf("Error code=%i, %s.\n",cudastat,cudaGetErrorString(cudastat));
printf("ignoreAdetection = %d\n\n",simulation->ignoreAdetection);
i=0;
while(threads_active_total>0)
{
i++;
//run the kernel
if(simulation->ignoreAdetection == 1){
MCd<1><<<dimGrid,dimBlock>>>(DeviceMem);
}
else{
MCd<0><<<dimGrid,dimBlock>>>(DeviceMem);
printf("Run MCd completed.\n");
// Kernel<<< >>>
cudaError_t error = cudaGetLastError();
printf("CUDA error: %s\n", cudaGetErrorString(error));
}
CUDA_SAFE_CALL( cudaThreadSynchronize() ); // Wait for all threads to finish
cudastat=cudaGetLastError(); // Check if there was an error
if(cudastat)printf("Error code=%i, %s.\n",cudastat,cudaGetErrorString(cudastat));
// Copy thread_active from device to host
CUDA_SAFE_CALL( cudaMemcpy(HostMem.thread_active,DeviceMem.thread_active,NUM_THREADS*sizeof(unsigned int),cudaMemcpyDeviceToHost) );
threads_active_total = 0;
for(ii=0;ii<NUM_THREADS;ii++)
threads_active_total+=HostMem.thread_active[ii];
CUDA_SAFE_CALL( cudaMemcpy(HostMem.num_terminated_photons,DeviceMem.num_terminated_photons,sizeof(unsigned int),cudaMemcpyDeviceToHost) );
printf("Run %u, Number of photons terminated %u, Threads active %u\n",i,*HostMem.num_terminated_photons,threads_active_total);
}
printf("Simulation done!\n");
CopyDeviceToHostMem(&HostMem, &DeviceMem, simulation);
time2=clock();
printf("Simulation time: %.2f sec\n",(double)(time2-time1)/CLOCKS_PER_SEC);
Write_Simulation_Results(&HostMem, simulation, time2-time1);
FreeMemStructs(&HostMem,&DeviceMem);
}
int main(int argc,char* argv[])
{
int i;
char input_filename[STR_LEN];
SimulationStruct* simulations;
int n_simulations;
unsigned long long seed = (unsigned long long) time(NULL);// Default, use time(NULL) as seed
int ignoreAdetection = 0;
char* filename;
printf("%d \n",argc);
for (i=0;i<argc;i++)
printf(" %s \n",argv[i]);//输入数组到argv[i]中
GetFile(input_filename);
printf("%s \n",input_filename);
//if(argc<2){printf("Not enough input arguments!\n");return 1;}
//else{filename=argv[1];}
//if(interpret_arg(argc,argv,&seed,&ignoreAdetection)) return 1;
filename=input_filename;
printf("%s \n",filename);
n_simulations = read_simulation_data(filename, &simulations, ignoreAdetection);
if(n_simulations == 0)
{
printf("Something wrong with read_simulation_data!\n");
return 1;
}
else
{
printf("Read %d simulations\n",n_simulations);
}
// Allocate memory for RNG's
unsigned long long x[NUM_THREADS];
unsigned int a[NUM_THREADS];
//Init RNG's
if(init_RNG(x, a, NUM_THREADS, "safeprimes_base32.txt", seed)) return 1;
//perform all the simulations
for(i=0;i<n_simulations;i++)
{
// Run a simulation
DoOneSimulation(&simulations[i],x,a);
}
FreeSimulationStruct(simulations, n_simulations);
return 0;
}