diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml index be9fed1..0978da1 100644 --- a/.github/workflows/ci-test.yml +++ b/.github/workflows/ci-test.yml @@ -19,7 +19,7 @@ jobs: fail-fast: false matrix: - version: ['1.1', '1.2', '1.3', '1.4', '1.5', 'nightly'] + version: ['1.5', '1.6', '1.7', 'nightly'] os: [ubuntu-latest] include: diff --git a/Project.toml b/Project.toml index 2f25435..5f8968a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "SetIntersectionProjection" uuid = "335f7d24-6316-57dd-9c3a-df470f2b739e" authors = ["Bas Peters <1.bas.peters@gmail.com>"] -version = "0.2.2" +version = "0.2.3" [deps] Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" @@ -21,7 +21,7 @@ TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" [compat] DistributedArrays = "0.6" Interpolations = "0.13" -JOLI = "0.7" +JOLI = "0.7,0.8" Parameters = "0.12" SortingAlgorithms = "1" TimerOutputs = "0.5.15" diff --git a/examples/ConstraintSetupExamples.jl b/examples/ConstraintSetupExamples.jl index 4d32496..72b04ed 100644 --- a/examples/ConstraintSetupExamples.jl +++ b/examples/ConstraintSetupExamples.jl @@ -27,16 +27,16 @@ else #error("download escalator video from http://cvxr.com/tfocs/demos/rpca/escalator_data.mat") end -mtrue = read(file, "X") -n1 = convert(Integer,read(file, "m")) -n2 = convert(Integer,read(file, "n")) -m_mat = convert(Array{TF,2},mtrue) -m_tensor = convert(Array{TF,3},reshape(mtrue,n1,n2,Integer(200))) +mtrue = read(file, "X"); +n1 = convert(Integer,read(file, "m")); +n2 = convert(Integer,read(file, "n")); +m_mat = convert(Array{TF,2},mtrue); +m_tensor = convert(Array{TF,3},reshape(mtrue,n1,n2,Integer(200))); #computational grid for the video -comp_grid = compgrid((1f0,1f0,1f0),(size(m_tensor,1),size(m_tensor,2), size(m_tensor,3))) +comp_grid = compgrid((1f0,1f0,1f0),(size(m_tensor,1),size(m_tensor,2), size(m_tensor,3))); -comp_grid_time_slice = compgrid((1f0,1f0),(size(m_tensor,1),size(m_tensor,2))) +comp_grid_time_slice = compgrid((1f0,1f0),(size(m_tensor,1),size(m_tensor,2))); ###################################################################### @@ -52,7 +52,7 @@ options = PARSDMM_options() #l1 (total variation) constraints (in one direction) #find a reasonable value for the l1-ball -(TD_OP, AtA_diag, dense, TD_n) = get_TD_operator(comp_grid,"D_z",options.FL) +(TD_OP, AtA_diag, dense, TD_n) = get_TD_operator(comp_grid,"D_z",options.FL); TV_z = norm(TD_OP*vec(m_tensor),1) m_min = 0.0 @@ -61,15 +61,15 @@ set_type = "l1" TD_OP = "D_z" app_mode = ("tensor","") custom_TD_OP = ([],false) -push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP)) +push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP)); -(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL) -(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options) +(P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL); +(TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options); @time (x,log_PARSDMM) = PARSDMM(vec(m_tensor),AtA,TD_OP,set_Prop,P_sub,comp_grid,options); @time (x,log_PARSDMM) = PARSDMM(vec(m_tensor),AtA,TD_OP,set_Prop,P_sub,comp_grid,options); -m_proj = reshape(x,comp_grid.n) +m_proj = reshape(x,comp_grid.n); figure(); for i=1:comp_grid.n[3] @@ -83,23 +83,23 @@ end ######## (anisotropic) total-variation on the time-derivative ######## ######## using JOLI operators ######## -options = PARSDMM_options() +options = PARSDMM_options(); #TV operator per time-slice -(TV, AtA_diag, dense, TD_n) = get_TD_operator(comp_grid_time_slice,"TV",options.FL) +(TV, AtA_diag, dense, TD_n) = get_TD_operator(comp_grid_time_slice,"TV",options.FL); #time derivative over the time-slices -(D, AtA_diag, dense, TD_n) = get_TD_operator(comp_grid,"D_z",options.FL) +(D, AtA_diag, dense, TD_n) = get_TD_operator(comp_grid,"D_z",options.FL); -CustomOP_explicit_sparse = kron(TV, SparseMatrixCSC{TF}(LinearAlgebra.I, comp_grid.n[3]-1,comp_grid.n[3]-1))* D +CustomOP_explicit_sparse = kron(TV, SparseMatrixCSC{TF}(LinearAlgebra.I, comp_grid.n[3]-1,comp_grid.n[3]-1))* D; -D = joMatrix(D) -TV = joMatrix(TV) -CustomOP_JOLI = joKron(TV, joEye(comp_grid.n[3]-1,DDT=Float32,RDT=Float32))* D +D = joMatrix(D); +TV = joMatrix(TV); +CustomOP_JOLI = joKron(TV, joEye(comp_grid.n[3]-1,DDT=Float32,RDT=Float32))* D; ## Solve using JOLI ## #initialize constraints - constraint = Vector{SetIntersectionProjection.set_definitions}() + constraint = Vector{SetIntersectionProjection.set_definitions}(); m_min = 0.0 m_max = 0.1*norm(CustomOP_JOLI*vec(m_tensor),1) @@ -122,7 +122,7 @@ CustomOP_JOLI = joKron(TV, joEye(comp_grid.n[3]-1,DDT=Float32,RDT=Float32))* D @time (x_joli,log_PARSDMM) = PARSDMM(vec(m_tensor),AtA,TD_OP,set_Prop,P_sub,comp_grid,options); ## solve using explicit sparse array ## - constraint = Vector{SetIntersectionProjection.set_definitions}() + constraint = Vector{SetIntersectionProjection.set_definitions}(); m_min = 0.0 m_max = 0.025*norm(CustomOP_explicit_sparse*vec(m_tensor),1) @@ -130,17 +130,17 @@ CustomOP_JOLI = joKron(TV, joEye(comp_grid.n[3]-1,DDT=Float32,RDT=Float32))* D TD_OP = "identity" app_mode = ("matrix","") custom_TD_OP = (CustomOP_explicit_sparse,false) - push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP)) + push!(constraint, set_definitions(set_type,TD_OP,m_min,m_max,app_mode,custom_TD_OP)); options=PARSDMM_options() - (P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL) + (P_sub,TD_OP,set_Prop) = setup_constraints(constraint,comp_grid,options.FL); #set properties of custom operator set_Prop.AtA_diag[1] = false set_Prop.dense[1] = false set_Prop.banded[1] = true - (TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options) + (TD_OP,AtA,l,y) = PARSDMM_precompute_distribute(TD_OP,set_Prop,comp_grid,options); @time (x_sp,log_PARSDMM) = PARSDMM(vec(m_tensor),AtA,TD_OP,set_Prop,P_sub,comp_grid,options); @time (x_sp,log_PARSDMM) = PARSDMM(vec(m_tensor),AtA,TD_OP,set_Prop,P_sub,comp_grid,options); diff --git a/examples/projection_intersection_2D.jl b/examples/projection_intersection_2D.jl index 36119c4..8a58bed 100755 --- a/examples/projection_intersection_2D.jl +++ b/examples/projection_intersection_2D.jl @@ -114,7 +114,7 @@ options.parallel = true @time (x2,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options); @time (x2,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options); -#@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options); +@time (x,log_PARSDMM) = PARSDMM(m,AtA,TD_OP,set_Prop,P_sub,comp_grid,options); #plot figure(); @@ -125,27 +125,30 @@ savefig("projected_model_ParallelPARSDMM.png",bbox_inches="tight") #print timings in terminal log_PARSDMM.timing -# #use multilevel-serial (2-levels) -# options.parallel = false +#use multilevel-serial (2-levels) +options.parallel = false -# #2 levels, the gird point spacing at level 2 is 3X that of the original (level 1) grid -# n_levels = 2 -# coarsening_factor = 3 +#2 levels, the gird point spacing at level 2 is 3X that of the original (level 1) grid +n_levels = 2 +coarsening_factor = 3 -# #set up all required quantities for each level -# #(m_levels,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels)=setup_multi_level_PARSDMM(m,n_levels,coarsening_factor,comp_grid,constraint,options) -# (TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels)=setup_multi_level_PARSDMM(m,n_levels,coarsening_factor,comp_grid,constraint,options) +#set up all required quantities for each level +#(m_levels,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels)=setup_multi_level_PARSDMM(m,n_levels,coarsening_factor,comp_grid,constraint,options) +(TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels)=setup_multi_level_PARSDMM(m,n_levels,coarsening_factor,comp_grid,constraint,options) -# println("") -# println("PARSDMM multilevel-serial (bounds and bounds on D_z):") -# @time (x,log_PARSDMM) = PARSDMM_multi_level(m,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels,options); -# @time (x,log_PARSDMM) = PARSDMM_multi_level(m,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels,options); -# @time (x,log_PARSDMM) = PARSDMM_multi_level(m,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels,options); +println("") +println("PARSDMM multilevel-serial (bounds and bounds on D_z):") +@time (x,log_PARSDMM) = PARSDMM_multi_level(m,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels,options); +@time (x,log_PARSDMM) = PARSDMM_multi_level(m,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels,options); +@time (x,log_PARSDMM) = PARSDMM_multi_level(m,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels,options); -# figure(); -# subplot(2,1,1);imshow(permutedims(reshape(m,(comp_grid.n[1],comp_grid.n[2])),[2,1]),cmap="jet",vmin=vmi,vmax=vma,extent=[0, xmax, zmax, 0]); title("model to project") -# subplot(2,1,2);imshow(permutedims(reshape(x,(comp_grid.n[1],comp_grid.n[2])),[2,1]),cmap="jet",vmin=vmi,vmax=vma,extent=[0, xmax, zmax, 0]); title("Projection (bounds and bounds on D_z)") -# savefig("projected_model_MultilevelSerialPARSDMM.png",bbox_inches="tight") +figure(); +subplot(2,1,1);imshow(permutedims(reshape(m,(comp_grid.n[1],comp_grid.n[2])),[2,1]),cmap="jet",vmin=vmi,vmax=vma,extent=[0, xmax, zmax, 0]); title("model to project") +subplot(2,1,2);imshow(permutedims(reshape(x,(comp_grid.n[1],comp_grid.n[2])),[2,1]),cmap="jet",vmin=vmi,vmax=vma,extent=[0, xmax, zmax, 0]); title("Projection (bounds and bounds on D_z)") +savefig("projected_model_MultilevelSerialPARSDMM.png",bbox_inches="tight") + +#print timings in terminal, note that for the multi-level version, the timing is only for the final level on the original grid +log_PARSDMM.timing # #now use multi-level with parallel PARSDMM # options.parallel=true diff --git a/examples/projection_intersection_3D.jl b/examples/projection_intersection_3D.jl index 8e10726..073ef6a 100644 --- a/examples/projection_intersection_3D.jl +++ b/examples/projection_intersection_3D.jl @@ -102,13 +102,13 @@ subplot(3, 3, 8);plot(log_PARSDMM.gamma) ;title("gamma") subplot(3, 3, 9);semilogy(log_PARSDMM.evol_x) ;title("x evolution") #plot -m_plot = reshape(m,comp_grid.n) +m_plot = reshape(m,comp_grid.n); figure(); subplot(3,1,1);imshow(m_plot[:,:,Int64(round(comp_grid.n[3]/2))],cmap="jet",vmin=vmi,vmax=vma,extent=[0, xmax, ymax, 0]); title("model to project x-y slice") subplot(3,1,2);imshow(permutedims(m_plot[:,Int64(round(comp_grid.n[2]/2)),:],[2,1]),cmap="jet",vmin=vmi,vmax=vma,extent=[0, xmax, zmax, 0]); title("model to project x-z slice") subplot(3,1,3);imshow(permutedims(m_plot[Int64(round(comp_grid.n[1]/2)),:,:],[2,1]),cmap="jet",vmin=vmi,vmax=vma,extent=[0, ymax, zmax, 0]); title("model to project y-z slice") -x_plot = reshape(x,comp_grid.n) +x_plot = reshape(x,comp_grid.n); figure(); subplot(3,1,1);imshow(x_plot[:,:,Int64(round(comp_grid.n[3]/2))],cmap="jet",vmin=vmi,vmax=vma,extent=[0, xmax, ymax, 0]); title("projected model x-y slice") subplot(3,1,2);imshow(permutedims(x_plot[:,Int64(round(comp_grid.n[2]/2)),:],[2,1]),cmap="jet",vmin=vmi,vmax=vma,extent=[0, xmax, zmax, 0]); title("projected model x-z slice") @@ -121,13 +121,13 @@ n_levels = 2 coarsening_factor = 3 #set up all required quantities for each level -(TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels) = setup_multi_level_PARSDMM(m,n_levels,coarsening_factor,comp_grid,constraint,options) +(TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels) = setup_multi_level_PARSDMM(m,n_levels,coarsening_factor,comp_grid,constraint,options); println("") println("PARSDMM multilevel-serial (bounds and bounds on D_z):") -@time (x,log_PARSDMM) = PARSDMM_multi_level(m,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels,options) -@time (x,log_PARSDMM) = PARSDMM_multi_level(m,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels,options) -@time (x,log_PARSDMM) = PARSDMM_multi_level(m,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels,options) +@time (x,log_PARSDMM) = PARSDMM_multi_level(m,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels,options); +@time (x,log_PARSDMM) = PARSDMM_multi_level(m,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels,options); +@time (x,log_PARSDMM) = PARSDMM_multi_level(m,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels,options); #parallel single level println("") @@ -147,7 +147,7 @@ n_levels = 2 coarsening_factor = 3 #set up all required quantities for each level -(TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels) = setup_multi_level_PARSDMM(m,n_levels,coarsening_factor,comp_grid,constraint,options) +(TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels) = setup_multi_level_PARSDMM(m,n_levels,coarsening_factor,comp_grid,constraint,options); println("PARSDMM multilevel-parallel (bounds and bounds on D_z):") @time (x,log_PARSDMM) = PARSDMM_multi_level(m,TD_OP_levels,AtA_levels,P_sub_levels,set_Prop_levels,comp_grid_levels,options); diff --git a/src/CDS_MVp.jl b/src/CDS_MVp.jl index 4f5c190..e06b7ac 100755 --- a/src/CDS_MVp.jl +++ b/src/CDS_MVp.jl @@ -1,4 +1,4 @@ -export CDS_MVp, CDS_MVp2, CDS_MVp3, CDS_MVp4 +export CDS_MVp """ compute single-thread matrix vector product with vector x, output is vector y: y=A*x @@ -19,9 +19,9 @@ function CDS_MVp( r0 = max(1, 1-d) r1 = min(N, N-d) c0 = max(1, 1+d) - for r = r0 : r1 - c = r - r0 + c0 #original - @inbounds y[r] = y[r] + R[r,i] * x[c]#original + for r = r0 : r1 + c = r - r0 + c0 #original + @inbounds y[r] = y[r] + R[r,i] * x[c]#original end end return y @@ -51,7 +51,7 @@ end # return y # end -# function CDS_MVp( +# function CDS_MVp2( # N ::Integer, # ndiags ::Integer, # R ::Array{TF,2}, diff --git a/src/PARSDMM.jl b/src/PARSDMM.jl index 12d1a2e..b0d0f09 100755 --- a/src/PARSDMM.jl +++ b/src/PARSDMM.jl @@ -104,7 +104,7 @@ for i=1:maxit #main loop # x-minimization @timeit to "argmin x" begin copy!(x_old,x); - (x,iter,relres,x_solve_tol_ref) = argmin_x(Q,rhs,x,x_solve_tol_ref,i,log_PARSDMM,Q_offsets,Ax_out) + (x,iter,relres,x_solve_tol_ref) = argmin_x(Q,rhs,x,x_solve_tol_ref,i,log_PARSDMM,Q_offsets,Ax_out,comp_grid) log_PARSDMM.cg_it[i] = iter log_PARSDMM.cg_relres[i] = relres end #end timer for argmin x diff --git a/src/PARSDMM_initialize.jl b/src/PARSDMM_initialize.jl index 28efcc4..9829cdd 100755 --- a/src/PARSDMM_initialize.jl +++ b/src/PARSDMM_initialize.jl @@ -86,10 +86,13 @@ function PARSDMM_initialize( m = [m ; zeros(TF,length(m)) ] end if parallel - #f_compute_rel_feas = (feasibility_initial,TD_OP,P_sub) -> compute_relative_feasibility(m,feasibility_initial,TD_OP,P_sub) - #[@spawnat pid f_compute_rel_feas for pid in 2:nworkers()] - #feasibility_initial = pmap(f_compute_rel_feas, feasibility_initial,TD_OP,P_sub; distributed=true, batch_size=1, on_error=nothing, retry_delays=[], retry_check=nothing) - feasibility_initial = pmap((feasibility_initial,TD_OP,P_sub) -> compute_relative_feasibility(m,feasibility_initial,TD_OP,P_sub) , feasibility_initial,TD_OP,P_sub; distributed=true, batch_size=1, on_error=nothing, retry_delays=[], retry_check=nothing) + feasibility_initial = distribute(feasibility_initial) + [@sync @spawnat pid m for pid in P_sub.pids] + [@sync @spawnat pid compute_relative_feasibility(m,feasibility_initial[:L],TD_OP[:L],P_sub[:L]) for pid in P_sub.pids] + feasibility_initial = convert(Vector{TF},feasibility_initial) + + #using pmap + #feasibility_initial = pmap((feasibility_initial,TD_OP,P_sub) -> compute_relative_feasibility(m,feasibility_initial,TD_OP,P_sub) , feasibility_initial,TD_OP,P_sub; distributed=true, batch_size=1, on_error=nothing, retry_delays=[], retry_check=nothing) else for ii = 1:length(P_sub) feasibility_initial[ii] = norm(P_sub[ii](TD_OP[ii]*m) .- TD_OP[ii]*m) ./ (norm(TD_OP[ii]*m)+(100*eps(TF))); diff --git a/src/SetIntersectionProjection.jl b/src/SetIntersectionProjection.jl index 9048202..79751d3 100644 --- a/src/SetIntersectionProjection.jl +++ b/src/SetIntersectionProjection.jl @@ -15,6 +15,9 @@ using JOLI using JOLI.FFTW, JOLI.Wavelets using SortingAlgorithms using TimerOutputs +# using Flux +# using NNlib +# using CUDA export log_type_PARSDMM, set_properties, PARSDMM_options, set_definitions @@ -39,6 +42,7 @@ include("CDS_MVp.jl") include("CDS_MVp_MT.jl") include("CDS_MVp_MT_subfunc.jl") include("CDS_scaled_add!.jl") +#include("CDS2stencil.jl") #scripts for parallelism include("update_y_l_parallel.jl") @@ -53,7 +57,7 @@ include("setup_multi_level_PARSDMM.jl") include("constraint2coarse.jl") include("interpolate_y_l.jl") -#scripts for setting up constraints, projetors, linear operators +#scripts for setting up constraints, projectors, linear operators include("default_PARSDMM_options.jl") include("convert_options!.jl") include("get_discrete_Grad.jl"); @@ -97,13 +101,6 @@ mutable struct log_type_PARSDMM cg_it :: Vector{Integer} cg_relres :: Vector{Real} timing :: TimerOutput - # T_cg :: Real - # T_stop :: Real - # T_ini :: Real - # T_rhs :: Real - # T_adjust_rho_gamma:: Real - # T_y_l_upd :: Real - # T_Q_upd :: Real end @with_kw mutable struct PARSDMM_options diff --git a/src/argmin_x.jl b/src/argmin_x.jl index dd63247..663ce76 100755 --- a/src/argmin_x.jl +++ b/src/argmin_x.jl @@ -11,7 +11,8 @@ function argmin_x( i ::Integer, log_PARSDMM, Q_offsets=[], - Ax_out=zeros(TF,length(x)) ::Vector{TF} + Ax_out=zeros(TF,length(x)) ::Vector{TF}, + comp_grid=[] ) where {TF<:Real} #Initialize @@ -20,8 +21,14 @@ function argmin_x( iter = 0 if typeof(Q)==Array{TF,2} #set up multi-threaded matrix-vector product in compressed diagonal storage format - Af1(in) = (fill!(Ax_out,TF(0)); CDS_MVp_MT(size(Q,1),size(Q,2),Q,Q_offsets,in,Ax_out); return Ax_out) + + # w = CDS2stencil(Q, Q_offsets, comp_grid.n) + # w = w |> gpu + # cdims = DenseConvDims(reshape(x, comp_grid.n ...,1,1), w, padding=Int(size(w,1)-1)/2) + Af1(in) = Ax_CDS_MT(in, Ax_out, Q, Q_offsets) + #Af1(in) = Ax_stencil_cuda(in, Ax_out, comp_grid, w, cdims) + #determine what relative residual CG needs to reach if i<3 #i is the PARSDMM iteration counter x_solve_tol_ref = TF(max(0.1*norm(Af1(x)-rhs)/norm(rhs),10*eps(TF))) #10% of current relative residual @@ -30,6 +37,7 @@ function argmin_x( end (x,flag,relres,iter) = cg(Af1,rhs,tol=x_solve_tol_ref,maxIter=1000,x=x,out=0); + #(x,flag,relres,iter) = cg(Q,Q_offsets,rhs, x_solve_tol_ref, 1000, x, 0) elseif typeof(Q)==SparseMatrixCSC{TF,Int64} #CG with native Julia sparse CSC format MPVs @@ -60,3 +68,29 @@ function argmin_x( return x,iter,relres,x_solve_tol_ref end + +function Ax_CDS_MT(in::Vector{TF}, Ax_out::Vector{TF}, Q::Array{TF,2}, Q_offsets::Vector{Int}) where TF <: Real + + fill!(Ax_out,TF(0)) + CDS_MVp_MT(size(Q,1),size(Q,2),Q,Q_offsets,in,Ax_out); + + return Ax_out +end + +# function Ax_stencil_cuda(in::Vector{TF}, Ax_out::Vector{TF}, comp_grid, w::CUDA.CuArray{TF, 5}, cdims) where TF <: Real +# in = in |> gpu +# Ax_out = Ax_out |>gpu +# fill!(Ax_out,TF(0)) +# Ax_out = reshape(Ax_out,comp_grid.n ...,1,1) +# in = reshape(in,comp_grid.n ...,1,1) +# conv!(Ax_out, in, w, cdims) + +# # #for now, boundary conditions are an issue, manually correct it +# # kernel_size = size(w,1) + + +# Ax_out = Ax_out |> cpu + +# return vec(Ax_out) + +# end diff --git a/src/cg.jl b/src/cg.jl index eb92023..43db4d7 100644 --- a/src/cg.jl +++ b/src/cg.jl @@ -41,11 +41,10 @@ Output: """ # function cg(A::Function,b::Vector; tol::Real=1e-2,maxIter::Integer=100,M::Function=identity,x::Vector=[],out::Integer=0, # storeInterm::Bool=false) -function cg(A::Function,b::Vector{TF}; tol::Real=1e-2,maxIter::Integer=100,M::Function=identity,x::Vector=[],out::Integer=0, - storeInterm::Bool=false) where {TF <: Real} +function cg(A::Function,b::Vector{TF}; tol::Real=1e-2,maxIter::Integer=100,M::Function=identity,x::Vector=[],out::Integer=0) where {TF <: Real} n = length(b) - if norm(b)==0; return zeros(eltype(b),n),-9,0.0,0,[0.0]; end + if norm(b)==0; return zeros(eltype(b),n), -9, 0f0, 0, [0f0]; end if isempty(x) x = zeros(TF,n)#x = zeros(eltype(b),n) r = copy(b) @@ -55,9 +54,9 @@ function cg(A::Function,b::Vector{TF}; tol::Real=1e-2,maxIter::Integer=100,M::Fu z = M(r)::Vector{TF} p = copy(z)::Vector{TF} - if storeInterm - X = zeros(n,maxIter) # allocate space for intermediates - end + # if storeInterm + # X = zeros(n,maxIter) # allocate space for intermediates + # end nr0 = norm(b) @@ -125,9 +124,85 @@ function cg(A::Function,b::Vector{TF}; tol::Real=1e-2,maxIter::Integer=100,M::Fu println("cg achieved desired tolerance at iteration %d. Residual norm is %1.2e.",lastIter,resvec[lastIter]) end end - if storeInterm - return X[:,1:lastIter],flag,resvec[lastIter],lastIter,resvec[1:lastIter] - else - return x,flag,resvec[lastIter],lastIter,resvec[1:lastIter] - end + return x,flag,resvec[lastIter],lastIter,resvec[1:lastIter] end + +#specifically for CDS/DIA storage matrices +# function cg(R::Matrix{TF},offset::Vector{TI},b::Vector{TF},tol::Real,maxIter::Integer,x::Vector{TF},out::Integer) where {TF <: Real, TI <: Integer} +# n = length(b) +# ndiags = size(R,2) +# r = zeros(TF,n) + +# if norm(b)==0; return zeros(eltype(b),n),-9,0.0,0,[0.0]; end + +# CDS_MVp_MT(n,ndiags,R,offset,x,r) +# r .= -r .+ b +# #r = b .- A(x)::Vector{TF} + +# z = copy(r) + +# p = copy(r)::Vector{TF} + +# nr0 = norm(b) + +# resvec = zeros(TF,maxIter) +# iter = 1 # makes iter available outside the loop +# flag = -1 + +# if norm(r)/nr0<=tol +# flag = 0; +# return x,flag,resvec[iter],iter,resvec[1:iter] +# end + +# alpha = TF(0) +# beta = TF(0) +# lastIter = 0 +# Ap = zeros(TF,n) +# for iter=1:maxIter +# lastIter = iter + +# #Ap = A(p)::Vector{TF} +# fill!(Ap,TF(0)) +# CDS_MVp_MT(n,ndiags,R,offset,p,Ap) +# gamma = dot(r,z) +# #gamma = BLAS.dot(n, r,1,z,1)# +# alpha = gamma / dot(p,Ap) +# #alpha = gamma / BLAS.dot(n, p,1,Ap,1)#dot(p,Ap) + +# if alpha==Inf || alpha<0 +# flag = -2; break +# end + +# BLAS.axpy!(n,alpha,p,1,x,1) # x = alpha*p+x +# BLAS.axpy!(n,-alpha,Ap,1,r,1) # r -= alpha*Ap + +# #resvec[iter] = BLAS.nrm2(n, r, 1) / nr0# +# resvec[iter] = norm(r)/nr0 +# if resvec[iter] <= tol +# flag = 0; break +# end + +# #z = M(r)::Vector{TF} +# z = copy(r) +# #beta = BLAS.dot(n, z,1,r,1) / gamma +# beta = dot(z,r) / gamma +# # the following two lines are equivalent to p = z + beta*p +# #p = BLAS.scal!(n,beta,p,1)::Vector{TF} +# #p = BLAS.axpy!(n,TF(1.0),z,1,p,1)::Vector{TF} +# p = BLAS.axpby!(TF(1.0), z, beta, p)::Vector{TF} # Overwrite Y with X*a + Y*b, where a and b are scalars. Return Y +# end +# +# if out>=0 +# if flag==-1 +# println("cg iterated maxIter (=%d) times but reached only residual norm %1.2e instead of tol=%1.2e.", +# maxIter,resvec[lastIter],tol) +# elseif flag==-2 +# println("Matrix A in cg has to be positive definite.") +# elseif flag==0 && out>=1 +# println("cg achieved desired tolerance at iteration %d. Residual norm is %1.2e.",lastIter,resvec[lastIter]) +# end +# end + +# return x,flag,resvec[lastIter],lastIter,resvec[1:lastIter] + +# end diff --git a/src/compute_relative_feasibility.jl b/src/compute_relative_feasibility.jl index 45b8282..bb2c2d4 100644 --- a/src/compute_relative_feasibility.jl +++ b/src/compute_relative_feasibility.jl @@ -6,19 +6,19 @@ r_feas = ||P(A*x)-A*x||/||A*x|| Helper function that is intended for the parallel version only, where just a single operator is sent to a worker where this script runs """ -# function compute_relative_feasibility(m::Vector{TF}, feasibility_initial::Vector{TF}, -# TD_OP::Vector{Union{SparseMatrixCSC{TF,TI},JOLI.joAbstractLinearOperator{TF,TF}}}, -# P_sub) where {TF<:Real,TI<:Integer} -function compute_relative_feasibility(m::Vector{TF},feasibility_initial::TF, - TD_OP::Union{SparseMatrixCSC{TF,TI},JOLI.joAbstractLinearOperator{TF,TF}}, - P_sub) where {TF<:Real,TI<:Integer} +function compute_relative_feasibility(m::Vector{TF}, feasibility_initial::Vector{TF}, + TD_OP::Vector{Union{SparseMatrixCSC{TF,TI},JOLI.joAbstractLinearOperator{TF,TF}}}, + P_sub) where {TF<:Real,TI<:Integer} +# function compute_relative_feasibility(m::Vector{TF},feasibility_initial::TF, +# TD_OP::Union{SparseMatrixCSC{TF,TI},JOLI.joAbstractLinearOperator{TF,TF}}, +# P_sub) where {TF<:Real,TI<:Integer} #note that P_sub mutates the input in-place - s_temp = TD_OP*m - norm(P_sub(deepcopy(s_temp)) .- s_temp) + s_temp = TD_OP[1]*m + norm(P_sub[1](deepcopy(s_temp)) .- s_temp) # feasibility_initial[1]=norm(P_sub[1](pm) .- pm) ./ (norm(pm)+(100*eps(TF))) - feasibility_initial = norm(P_sub(deepcopy(s_temp)) .- s_temp) ./ (norm(s_temp)+(100*eps(TF))) + feasibility_initial[1] = norm(P_sub[1](deepcopy(s_temp)) .- s_temp) ./ (norm(s_temp)+(100*eps(TF))) s_temp = [] - #gc()# check if we need this in julia >0.7 - return feasibility_initial + + #return feasibility_initial end