Work on GPU code for materials management

arifoley · Nov 14, 2012 · 9be1bc4 · 9be1bc4
1 parent 51fc291
commit 9be1bc4
Show file tree

Hide file tree

Showing 4 changed files with 63 additions and 57 deletions.
diff --git a/source/digits_hits/src/GateTrackingGPUActorIO.cc b/source/digits_hits/src/GateTrackingGPUActorIO.cc
@@ -77,7 +77,7 @@ void GateTrackingGPUActorInput_Init_Materials(GateTrackingGPUActorInput * input,
     //DD(m[i]->GetName());
     input->mat_nb_elements[i] = m[i]->GetNumberOfElements();
     if (i == 0) input->mat_index[i] = 0;
-    else  input->mat_index[i] = input->mat_nb_elements[i]+input->mat_index[i-1];
+    else  input->mat_index[i] = input->mat_nb_elements[i-1]+input->mat_index[i-1];
     //GateTrackingGPUActorInput_Print_mat(input, i);
     //DD(input->mat_nb_elements[i]);
     //DD(input->mat_index[i]);

diff --git a/source/gpu/src/actor_common.cu b/source/gpu/src/actor_common.cu
@@ -16,7 +16,7 @@ __constant__ const float gpu_twopi = 2*gpu_pi;
 /***********************************************************
  * Data material strucutre
  ***********************************************************/
-/*
+
 // Structure for materials
 struct Materials{
     unsigned int nb_materials;              // n
@@ -37,7 +37,7 @@ struct Materials{
     float *fC;
     float *fA;
     float *fM;
-}
+};
 
 // Materials device allocation
 void materials_device_malloc(Materials &mat, unsigned int nb_mat, unsigned int nb_elm) {
@@ -134,7 +134,7 @@ void materials_host_free(Materials &mat) {
     free(mat.fA);
     free(mat.fM);
 }
-*/
+
 /***********************************************************
  * Stack data particle strucutre
  ***********************************************************/
@@ -308,34 +308,32 @@ void volume_device_free(Volume<T> &vol) {
  * Copy structure functions
  ***********************************************************/
 
-/*
 // Copy materials from host to device
 void materials_copy_host2device(Materials &host, Materials &device) {
     unsigned int nb_mat = host.nb_materials;
-    unsigned int nb_elm = hot.nb_elements_total;
+    unsigned int nb_elm = host.nb_elements_total;
 
     unsigned int mem_mat_usi = nb_mat * sizeof(unsigned short int);
     unsigned int mem_mat_float = nb_mat * sizeof(float);
     unsigned int mem_elm_usi = nb_elm * sizeof(unsigned short int);
     unsigned int mem_elm_float = nb_elm * sizeof(float);
 
-    cudaMemcpy(host.nb_elements, device.nb_elements, mem_mat_usi, cudaMemcpyDeviceToHost);
-    cudaMemcpy(host.index, device.index, mem_mat_usi, cudaMemcpyDeviceToHost);
-    cudaMemcpy(host.mixture, device.mixture, mem_elm_usi, cudaMemcpyDeviceToHost);
-    cudaMemcpy(host.atom_num_dens, device.atom_num_dens, mem_elm_float, cudaMemcpyDeviceToHost);
-    cudaMemcpy(host.nb_atoms_per_vol, device.nb_atoms_per_vol, mem_mat_float, cudaMemcpyDeviceToHost);
-    cudaMemcpy(host.nb_electrons_per_vol, device.nb_electrons_per_vol, mem_mat_float, cudaMemcpyDeviceToHost);
-    cudaMemcpy(host.electron_cut_energy, device.electron_cut_energy, mem_mat_float, cudaMemcpyDeviceToHost);
-    cudaMemcpy(host.electron_max_energy, device.electron_max_energy, mem_mat_float, cudaMemcpyDeviceToHost);
-    cudaMemcpy(host.electron_mean_excitation_energy, device.electron_mean_excitation_energy, mem_mat_float, cudaMemcpyDeviceToHost);
-    cudaMemcpy(host.fX0, device.fX0, mem_mat_float, cudaMemcpyDeviceToHost);
-    cudaMemcpy(host.fX1, device.fX1, mem_mat_float, cudaMemcpyDeviceToHost);
-    cudaMemcpy(host.fD0, device.fD0, mem_mat_float, cudaMemcpyDeviceToHost);
-    cudaMemcpy(host.fC, device.fC, mem_mat_float, cudaMemcpyDeviceToHost);
-    cudaMemcpy(host.fA, device.fA, mem_mat_float, cudaMemcpyDeviceToHost);
-    cudaMemcpy(host.fM, device.fM, mem_mat_float, cudaMemcpyDeviceToHost);
+    cudaMemcpy(device.nb_elements, host.nb_elements, mem_mat_usi, cudaMemcpyHostToDevice);
+    cudaMemcpy(device.index, host.index, mem_mat_usi, cudaMemcpyHostToDevice);
+    cudaMemcpy(device.mixture, host.mixture, mem_elm_usi, cudaMemcpyHostToDevice);
+    cudaMemcpy(device.atom_num_dens, host.atom_num_dens, mem_elm_float, cudaMemcpyHostToDevice);
+    cudaMemcpy(device.nb_atoms_per_vol, host.nb_atoms_per_vol, mem_mat_float, cudaMemcpyHostToDevice);
+    cudaMemcpy(device.nb_electrons_per_vol, host.nb_electrons_per_vol, mem_mat_float, cudaMemcpyHostToDevice);
+    cudaMemcpy(device.electron_cut_energy, host.electron_cut_energy, mem_mat_float, cudaMemcpyHostToDevice);
+    cudaMemcpy(device.electron_max_energy, host.electron_max_energy, mem_mat_float, cudaMemcpyHostToDevice);
+    cudaMemcpy(device.electron_mean_excitation_energy, host.electron_mean_excitation_energy, mem_mat_float, cudaMemcpyHostToDevice);
+    cudaMemcpy(device.fX0, host.fX0, mem_mat_float, cudaMemcpyHostToDevice);
+    cudaMemcpy(device.fX1, host.fX1, mem_mat_float, cudaMemcpyHostToDevice);
+    cudaMemcpy(device.fD0, host.fD0, mem_mat_float, cudaMemcpyHostToDevice);
+    cudaMemcpy(device.fC, host.fC, mem_mat_float, cudaMemcpyHostToDevice);
+    cudaMemcpy(device.fA, host.fA, mem_mat_float, cudaMemcpyHostToDevice);
+    cudaMemcpy(device.fM, host.fM, mem_mat_float, cudaMemcpyHostToDevice);
 }
-*/
 
 // Copy stack from device to host
 void stack_copy_device2host(StackParticle &stackpart, StackParticle &phasespace) {
@@ -708,18 +706,18 @@ __device__ float Compton_CSPA_Standard(float E, unsigned short int Z) {
 		d5 = __logf(__fdividef(E, T0)); // y
 		CrossSection *= __expf(-d5 * (d3 + d4*d5));
 	}
-
-	return CrossSection;
+	
+    return CrossSection;
 }
 
 // Compute the total Compton cross section for a given material
-__device__ float Compton_CS_Standard(int mat, float E) {
+__device__ float Compton_CS_Standard(Materials materials, unsigned int mat, float E) {
 	float CS = 0.0f;
 	int i;
-	int index = mat_index[mat];
+	int index = materials.index[mat];
 	// Model standard
-	for (i = 0; i < mat_nb_elements[mat]; ++i) {
-		CS += (mat_atom_num_dens[index+i] * Compton_CSPA_Standard(E, mat_mixture[index+i]));
+	for (i = 0; i < materials.nb_elements[mat]; ++i) {
+		CS += (materials.atom_num_dens[index+i] * Compton_CSPA_Standard(E, materials.mixture[index+i]));
 	}
 	return CS;
 }
@@ -747,13 +745,13 @@ __device__ float PhotoElec_CSPA_Standard(float E, unsigned short int Z) {
 }
 
 // Compute the total Compton cross section for a given material
-__device__ float PhotoElec_CS_Standard(unsigned int mat, float E) {
+__device__ float PhotoElec_CS_Standard(Materials materials, unsigned int mat, float E) {
 	float CS = 0.0f;
 	int i;
-	int index = mat_index[mat];
+	int index = materials.index[mat];
 	// Model standard
-	for (i = 0; i < mat_nb_elements[mat]; ++i) {
-		CS += (mat_atom_num_dens[index+i] * PhotoElec_CSPA_Standard(E, mat_mixture[index+i]));
+	for (i = 0; i < materials.nb_elements[mat]; ++i) {
+		CS += (materials.atom_num_dens[index+i] * PhotoElec_CSPA_Standard(E, materials.mixture[index+i]));
 	}
 	return CS;
 }
@@ -787,6 +785,7 @@ __device__ float PhotoElec_ElecCosThetaDistribution(StackParticle part,
  * Electrons Physics Effects
  ***********************************************************/
 
+/* TODO Materials
 // eIonisation Cross Section Per Atom (Möller model)
 __device__ float eIonisation_CSPA_Standard(float E, unsigned short int Z) {
     float cutE = electron_cut_energy[1]; 
@@ -883,6 +882,7 @@ __device__ float eIonisation_dedx_Standard(int mat, float E, float cutE) {
 
     return dedx;
 }
+*/
 
 /***********************************************************
  * Utils Host

diff --git a/source/gpu/src/actor_ct_fun.cu b/source/gpu/src/actor_ct_fun.cu
@@ -91,8 +91,9 @@ __device__ float PhotoElec_ct_SampleSecondaries_Standard(StackParticle photons,
 #define PHOTON_BOUNDARY_VOXEL 4
 template <typename T1>
 __global__ void kernel_ct_navigation_regular(StackParticle photons,
-                                           Volume<T1> phantom,
-                                           int* count_d) {
+                                             Volume<T1> phantom,
+                                             Materials materials,
+                                             int* count_d) {
     unsigned int id = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
 
     if (id >= photons.size) return;
@@ -127,12 +128,24 @@ __global__ void kernel_ct_navigation_regular(StackParticle photons,
     float energy = photons.E[id];
 
     // Get material
-    T1 material = phantom.data[index_phantom.w];
-    // FIXME TO DEBUG
-    //material = 1;
-    if (material == 1) {material = 0;} // Air
-    if (material == 2) {material = 13;} // Brain
-    if (material == 3) {material = 7;}  // RibBone
+    T1 mat = phantom.data[index_phantom.w];
+
+    /*
+	int index = materials.index[mat];
+    printf("nb_mat %i mat %i index %i nb_elts %i\n", materials.nb_materials, mat, index, materials.nb_elements[mat]);
+
+    int toto=0;
+    while (toto<2) {
+        printf("mixture: %i num_dens %e\n", materials.mixture[index+toto], materials.atom_num_dens[index+toto]);
+        ++toto;
+    }
+
+    toto=0;
+    while(toto<materials.nb_elements_total) {
+        printf("elts %i\n", materials.mixture[toto]);
+        ++toto;
+    }
+    */
 
     //// Find next discrete interaction ///////////////////////////////////////
 
@@ -143,7 +156,7 @@ __global__ void kernel_ct_navigation_regular(StackParticle photons,
     float cross_section;
 
     // Photoelectric
-    cross_section = PhotoElec_CS_Standard(material, energy);
+    cross_section = PhotoElec_CS_Standard(materials, mat, energy);
     interaction_distance = __fdividef(-__logf(Brent_real(id, photons.table_x_brent, 0)),
                                      cross_section);
     if (interaction_distance < next_interaction_distance) {
@@ -152,7 +165,7 @@ __global__ void kernel_ct_navigation_regular(StackParticle photons,
     }
 
     // Compton
-    cross_section = Compton_CS_Standard(material, energy);
+    cross_section = Compton_CS_Standard(materials, mat, energy);
     interaction_distance = __fdividef(-__logf(Brent_real(id, photons.table_x_brent, 0)),
                                      cross_section);
     if (interaction_distance < next_interaction_distance) {
@@ -169,7 +182,7 @@ __global__ void kernel_ct_navigation_regular(StackParticle photons,
 
     // Distance to the next voxel boundary (raycasting)
     interaction_distance = get_boundary_voxel_by_raycasting(index_phantom, position, 
-                                                          direction, phantom.voxel_size);
+                                                            direction, phantom.voxel_size);
     if (interaction_distance < next_interaction_distance) {
       next_interaction_distance = interaction_distance;
       next_discrete_process = PHOTON_BOUNDARY_VOXEL;

diff --git a/source/gpu/src/actor_ct_track.cu b/source/gpu/src/actor_ct_track.cu
@@ -18,8 +18,8 @@ void GateTrackingGPUActorTrack(const GateTrackingGPUActorInput * input,
   double t_g = time();
 
   // Select a GPU
-  //cudaSetDevice(input->cudaDeviceID);
-  cudaSetDevice(1);
+  cudaSetDevice(input->cudaDeviceID);
+  //cudaSetDevice(1);
 
   long seed = input->seed;
   DD(seed);
@@ -35,11 +35,10 @@ void GateTrackingGPUActorTrack(const GateTrackingGPUActorInput * input,
   StackParticle photons_h;
   stack_host_malloc(photons_h, nb_of_particles);
 
-  /*
   // Materials def, alloc & loading  
   Materials materials_h;
   materials_host_malloc(materials_h, input->nb_materials, input->nb_elements_total);
-  
+
   materials_h.nb_elements = input->mat_nb_elements;
   materials_h.index = input->mat_index;
   materials_h.mixture = input->mat_mixture;
@@ -52,20 +51,14 @@ void GateTrackingGPUActorTrack(const GateTrackingGPUActorInput * input,
   materials_h.fX0 = input->fX0;
   materials_h.fX1 = input->fX1;
   materials_h.fD0 = input->fD0;
-  materials_h.fC = input->fD;
+  materials_h.fC = input->fC;
   materials_h.fA = input->fA;
   materials_h.fM = input->fM;
 
   Materials materials_d;
   materials_device_malloc(materials_d, input->nb_materials, input->nb_elements_total);
-  
   materials_copy_host2device(materials_h, materials_d);
-  
-  // To be continued... (JB)
-   
-  */
-
-
+
   // TIMING
   t_init = time() - t_init;
   double t_in = time();
@@ -149,11 +142,11 @@ void GateTrackingGPUActorTrack(const GateTrackingGPUActorInput * input,
   while (count_h < nb_of_particles) {
     ++step;
 
-    kernel_ct_navigation_regular<unsigned short int><<<grid, threads>>>(photons_d, phantom_d, count_d);
+    kernel_ct_navigation_regular<unsigned short int><<<grid, threads>>>(photons_d, phantom_d, 
+                                                                        materials_d, count_d);
 
     // get back the number of simulated photons
     cudaMemcpy(&count_h, count_d, sizeof(int), cudaMemcpyDeviceToHost);
-
   }
 
   // TIMING