Skip to content

Commit

Permalink
Work on GPU code for materials management
Browse files Browse the repository at this point in the history
  • Loading branch information
JulienBert committed Nov 14, 2012
1 parent 51fc291 commit 9be1bc4
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 57 deletions.
2 changes: 1 addition & 1 deletion source/digits_hits/src/GateTrackingGPUActorIO.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ void GateTrackingGPUActorInput_Init_Materials(GateTrackingGPUActorInput * input,
//DD(m[i]->GetName());
input->mat_nb_elements[i] = m[i]->GetNumberOfElements();
if (i == 0) input->mat_index[i] = 0;
else input->mat_index[i] = input->mat_nb_elements[i]+input->mat_index[i-1];
else input->mat_index[i] = input->mat_nb_elements[i-1]+input->mat_index[i-1];
//GateTrackingGPUActorInput_Print_mat(input, i);
//DD(input->mat_nb_elements[i]);
//DD(input->mat_index[i]);
Expand Down
62 changes: 31 additions & 31 deletions source/gpu/src/actor_common.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ __constant__ const float gpu_twopi = 2*gpu_pi;
/***********************************************************
* Data material strucutre
***********************************************************/
/*

// Structure for materials
struct Materials{
unsigned int nb_materials; // n
Expand All @@ -37,7 +37,7 @@ struct Materials{
float *fC;
float *fA;
float *fM;
}
};

// Materials device allocation
void materials_device_malloc(Materials &mat, unsigned int nb_mat, unsigned int nb_elm) {
Expand Down Expand Up @@ -134,7 +134,7 @@ void materials_host_free(Materials &mat) {
free(mat.fA);
free(mat.fM);
}
*/

/***********************************************************
* Stack data particle strucutre
***********************************************************/
Expand Down Expand Up @@ -308,34 +308,32 @@ void volume_device_free(Volume<T> &vol) {
* Copy structure functions
***********************************************************/

/*
// Copy materials from host to device
void materials_copy_host2device(Materials &host, Materials &device) {
unsigned int nb_mat = host.nb_materials;
unsigned int nb_elm = hot.nb_elements_total;
unsigned int nb_elm = host.nb_elements_total;

unsigned int mem_mat_usi = nb_mat * sizeof(unsigned short int);
unsigned int mem_mat_float = nb_mat * sizeof(float);
unsigned int mem_elm_usi = nb_elm * sizeof(unsigned short int);
unsigned int mem_elm_float = nb_elm * sizeof(float);

cudaMemcpy(host.nb_elements, device.nb_elements, mem_mat_usi, cudaMemcpyDeviceToHost);
cudaMemcpy(host.index, device.index, mem_mat_usi, cudaMemcpyDeviceToHost);
cudaMemcpy(host.mixture, device.mixture, mem_elm_usi, cudaMemcpyDeviceToHost);
cudaMemcpy(host.atom_num_dens, device.atom_num_dens, mem_elm_float, cudaMemcpyDeviceToHost);
cudaMemcpy(host.nb_atoms_per_vol, device.nb_atoms_per_vol, mem_mat_float, cudaMemcpyDeviceToHost);
cudaMemcpy(host.nb_electrons_per_vol, device.nb_electrons_per_vol, mem_mat_float, cudaMemcpyDeviceToHost);
cudaMemcpy(host.electron_cut_energy, device.electron_cut_energy, mem_mat_float, cudaMemcpyDeviceToHost);
cudaMemcpy(host.electron_max_energy, device.electron_max_energy, mem_mat_float, cudaMemcpyDeviceToHost);
cudaMemcpy(host.electron_mean_excitation_energy, device.electron_mean_excitation_energy, mem_mat_float, cudaMemcpyDeviceToHost);
cudaMemcpy(host.fX0, device.fX0, mem_mat_float, cudaMemcpyDeviceToHost);
cudaMemcpy(host.fX1, device.fX1, mem_mat_float, cudaMemcpyDeviceToHost);
cudaMemcpy(host.fD0, device.fD0, mem_mat_float, cudaMemcpyDeviceToHost);
cudaMemcpy(host.fC, device.fC, mem_mat_float, cudaMemcpyDeviceToHost);
cudaMemcpy(host.fA, device.fA, mem_mat_float, cudaMemcpyDeviceToHost);
cudaMemcpy(host.fM, device.fM, mem_mat_float, cudaMemcpyDeviceToHost);
cudaMemcpy(device.nb_elements, host.nb_elements, mem_mat_usi, cudaMemcpyHostToDevice);
cudaMemcpy(device.index, host.index, mem_mat_usi, cudaMemcpyHostToDevice);
cudaMemcpy(device.mixture, host.mixture, mem_elm_usi, cudaMemcpyHostToDevice);
cudaMemcpy(device.atom_num_dens, host.atom_num_dens, mem_elm_float, cudaMemcpyHostToDevice);
cudaMemcpy(device.nb_atoms_per_vol, host.nb_atoms_per_vol, mem_mat_float, cudaMemcpyHostToDevice);
cudaMemcpy(device.nb_electrons_per_vol, host.nb_electrons_per_vol, mem_mat_float, cudaMemcpyHostToDevice);
cudaMemcpy(device.electron_cut_energy, host.electron_cut_energy, mem_mat_float, cudaMemcpyHostToDevice);
cudaMemcpy(device.electron_max_energy, host.electron_max_energy, mem_mat_float, cudaMemcpyHostToDevice);
cudaMemcpy(device.electron_mean_excitation_energy, host.electron_mean_excitation_energy, mem_mat_float, cudaMemcpyHostToDevice);
cudaMemcpy(device.fX0, host.fX0, mem_mat_float, cudaMemcpyHostToDevice);
cudaMemcpy(device.fX1, host.fX1, mem_mat_float, cudaMemcpyHostToDevice);
cudaMemcpy(device.fD0, host.fD0, mem_mat_float, cudaMemcpyHostToDevice);
cudaMemcpy(device.fC, host.fC, mem_mat_float, cudaMemcpyHostToDevice);
cudaMemcpy(device.fA, host.fA, mem_mat_float, cudaMemcpyHostToDevice);
cudaMemcpy(device.fM, host.fM, mem_mat_float, cudaMemcpyHostToDevice);
}
*/

// Copy stack from device to host
void stack_copy_device2host(StackParticle &stackpart, StackParticle &phasespace) {
Expand Down Expand Up @@ -708,18 +706,18 @@ __device__ float Compton_CSPA_Standard(float E, unsigned short int Z) {
d5 = __logf(__fdividef(E, T0)); // y
CrossSection *= __expf(-d5 * (d3 + d4*d5));
}

return CrossSection;
return CrossSection;
}

// Compute the total Compton cross section for a given material
__device__ float Compton_CS_Standard(int mat, float E) {
__device__ float Compton_CS_Standard(Materials materials, unsigned int mat, float E) {
float CS = 0.0f;
int i;
int index = mat_index[mat];
int index = materials.index[mat];
// Model standard
for (i = 0; i < mat_nb_elements[mat]; ++i) {
CS += (mat_atom_num_dens[index+i] * Compton_CSPA_Standard(E, mat_mixture[index+i]));
for (i = 0; i < materials.nb_elements[mat]; ++i) {
CS += (materials.atom_num_dens[index+i] * Compton_CSPA_Standard(E, materials.mixture[index+i]));
}
return CS;
}
Expand Down Expand Up @@ -747,13 +745,13 @@ __device__ float PhotoElec_CSPA_Standard(float E, unsigned short int Z) {
}

// Compute the total Compton cross section for a given material
__device__ float PhotoElec_CS_Standard(unsigned int mat, float E) {
__device__ float PhotoElec_CS_Standard(Materials materials, unsigned int mat, float E) {
float CS = 0.0f;
int i;
int index = mat_index[mat];
int index = materials.index[mat];
// Model standard
for (i = 0; i < mat_nb_elements[mat]; ++i) {
CS += (mat_atom_num_dens[index+i] * PhotoElec_CSPA_Standard(E, mat_mixture[index+i]));
for (i = 0; i < materials.nb_elements[mat]; ++i) {
CS += (materials.atom_num_dens[index+i] * PhotoElec_CSPA_Standard(E, materials.mixture[index+i]));
}
return CS;
}
Expand Down Expand Up @@ -787,6 +785,7 @@ __device__ float PhotoElec_ElecCosThetaDistribution(StackParticle part,
* Electrons Physics Effects
***********************************************************/

/* TODO Materials
// eIonisation Cross Section Per Atom (Möller model)
__device__ float eIonisation_CSPA_Standard(float E, unsigned short int Z) {
float cutE = electron_cut_energy[1];
Expand Down Expand Up @@ -883,6 +882,7 @@ __device__ float eIonisation_dedx_Standard(int mat, float E, float cutE) {
return dedx;
}
*/

/***********************************************************
* Utils Host
Expand Down
35 changes: 24 additions & 11 deletions source/gpu/src/actor_ct_fun.cu
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,9 @@ __device__ float PhotoElec_ct_SampleSecondaries_Standard(StackParticle photons,
#define PHOTON_BOUNDARY_VOXEL 4
template <typename T1>
__global__ void kernel_ct_navigation_regular(StackParticle photons,
Volume<T1> phantom,
int* count_d) {
Volume<T1> phantom,
Materials materials,
int* count_d) {
unsigned int id = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;

if (id >= photons.size) return;
Expand Down Expand Up @@ -127,12 +128,24 @@ __global__ void kernel_ct_navigation_regular(StackParticle photons,
float energy = photons.E[id];

// Get material
T1 material = phantom.data[index_phantom.w];
// FIXME TO DEBUG
//material = 1;
if (material == 1) {material = 0;} // Air
if (material == 2) {material = 13;} // Brain
if (material == 3) {material = 7;} // RibBone
T1 mat = phantom.data[index_phantom.w];

/*
int index = materials.index[mat];
printf("nb_mat %i mat %i index %i nb_elts %i\n", materials.nb_materials, mat, index, materials.nb_elements[mat]);
int toto=0;
while (toto<2) {
printf("mixture: %i num_dens %e\n", materials.mixture[index+toto], materials.atom_num_dens[index+toto]);
++toto;
}
toto=0;
while(toto<materials.nb_elements_total) {
printf("elts %i\n", materials.mixture[toto]);
++toto;
}
*/

//// Find next discrete interaction ///////////////////////////////////////

Expand All @@ -143,7 +156,7 @@ __global__ void kernel_ct_navigation_regular(StackParticle photons,
float cross_section;

// Photoelectric
cross_section = PhotoElec_CS_Standard(material, energy);
cross_section = PhotoElec_CS_Standard(materials, mat, energy);
interaction_distance = __fdividef(-__logf(Brent_real(id, photons.table_x_brent, 0)),
cross_section);
if (interaction_distance < next_interaction_distance) {
Expand All @@ -152,7 +165,7 @@ __global__ void kernel_ct_navigation_regular(StackParticle photons,
}

// Compton
cross_section = Compton_CS_Standard(material, energy);
cross_section = Compton_CS_Standard(materials, mat, energy);
interaction_distance = __fdividef(-__logf(Brent_real(id, photons.table_x_brent, 0)),
cross_section);
if (interaction_distance < next_interaction_distance) {
Expand All @@ -169,7 +182,7 @@ __global__ void kernel_ct_navigation_regular(StackParticle photons,

// Distance to the next voxel boundary (raycasting)
interaction_distance = get_boundary_voxel_by_raycasting(index_phantom, position,
direction, phantom.voxel_size);
direction, phantom.voxel_size);
if (interaction_distance < next_interaction_distance) {
next_interaction_distance = interaction_distance;
next_discrete_process = PHOTON_BOUNDARY_VOXEL;
Expand Down
21 changes: 7 additions & 14 deletions source/gpu/src/actor_ct_track.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ void GateTrackingGPUActorTrack(const GateTrackingGPUActorInput * input,
double t_g = time();

// Select a GPU
//cudaSetDevice(input->cudaDeviceID);
cudaSetDevice(1);
cudaSetDevice(input->cudaDeviceID);
//cudaSetDevice(1);

long seed = input->seed;
DD(seed);
Expand All @@ -35,11 +35,10 @@ void GateTrackingGPUActorTrack(const GateTrackingGPUActorInput * input,
StackParticle photons_h;
stack_host_malloc(photons_h, nb_of_particles);

/*
// Materials def, alloc & loading
Materials materials_h;
materials_host_malloc(materials_h, input->nb_materials, input->nb_elements_total);

materials_h.nb_elements = input->mat_nb_elements;
materials_h.index = input->mat_index;
materials_h.mixture = input->mat_mixture;
Expand All @@ -52,20 +51,14 @@ void GateTrackingGPUActorTrack(const GateTrackingGPUActorInput * input,
materials_h.fX0 = input->fX0;
materials_h.fX1 = input->fX1;
materials_h.fD0 = input->fD0;
materials_h.fC = input->fD;
materials_h.fC = input->fC;
materials_h.fA = input->fA;
materials_h.fM = input->fM;

Materials materials_d;
materials_device_malloc(materials_d, input->nb_materials, input->nb_elements_total);
materials_copy_host2device(materials_h, materials_d);
// To be continued... (JB)
*/



// TIMING
t_init = time() - t_init;
double t_in = time();
Expand Down Expand Up @@ -149,11 +142,11 @@ void GateTrackingGPUActorTrack(const GateTrackingGPUActorInput * input,
while (count_h < nb_of_particles) {
++step;

kernel_ct_navigation_regular<unsigned short int><<<grid, threads>>>(photons_d, phantom_d, count_d);
kernel_ct_navigation_regular<unsigned short int><<<grid, threads>>>(photons_d, phantom_d,
materials_d, count_d);

// get back the number of simulated photons
cudaMemcpy(&count_h, count_d, sizeof(int), cudaMemcpyDeviceToHost);

}

// TIMING
Expand Down

0 comments on commit 9be1bc4

Please sign in to comment.