5 Jan 2023 |
neoabs | hello again | 16:13:04 |
neoabs | using BenchmarkTools
function test1(x::AbstractVector{Float64}, pad::Int; threshold::Float64=0.5)
return (pad ≥ size(x,1)÷2) ? 0 : sum( x[(1+pad):(end-pad)].> threshold)
end
function test2(x::AbstractVector{Float64}, pad::Int; threshold::Float64=0.5)
return (pad ≥ size(x,1)÷2) ? 0 : count( >(threshold), x[(1+pad):(end-pad)] )
end
function test3(x::Vector{Float64}, pad::Int; threshold::Float64=0.5)
return (pad ≥ size(x,1)÷2) ? (0) : sum( view(x,(1+pad):(size(x,1)-pad)).> threshold)
end
function test4(x::Vector{Float64}, pad::Int; threshold::Float64=0.5)
return (pad ≥ size(x,1)÷2) ? (0) : count( >(threshold), view(x,(1+pad):(size(x,1)-pad)))
end
x = rand(100000)
@btime test1($x, $10)
@btime test2($x, $10)
@btime test3($x, $10)
@btime test4($x, $10)
| 16:13:11 |
neoabs | julia> @btime test1($x, $10)
84.499 μs (5 allocations: 797.77 KiB)
50163
julia> @btime test2($x, $10)
70.372 μs (2 allocations: 781.17 KiB)
50163
julia> @btime test3($x, $10)
26.880 μs (3 allocations: 16.59 KiB)
50163
julia> @btime test4($x, $10)
54.483 μs (0 allocations: 0 bytes)
50163
| 16:13:34 |
neoabs | * using BenchmarkTools
function test1(x::AbstractVector{Float64}, pad::Int; threshold::Float64=0.5)
return (pad ≥ size(x,1)÷2) ? 0 : sum( x[(1+pad):(end-pad)].> threshold)
end
function test2(x::AbstractVector{Float64}, pad::Int; threshold::Float64=0.5)
return (pad ≥ size(x,1)÷2) ? 0 : count( >(threshold), x[(1+pad):(end-pad)] )
end
function test3(x::Vector{Float64}, pad::Int; threshold::Float64=0.5)
return (pad ≥ size(x,1)÷2) ? (0) : sum( view(x,(1+pad):(size(x,1)-pad)).> threshold)
end
function test4(x::Vector{Float64}, pad::Int; threshold::Float64=0.5)
return (pad ≥ size(x,1)÷2) ? (0) : count( >(threshold), view(x,(1+pad):(size(x,1)-pad)))
end
x = rand(100000)
@btime test1($x, $10)
@btime test2($x, $10)
@btime test3($x, $10)
@btime test4($x, $10)
| 16:13:43 |
neoabs | * julia> @btime test1($x, $10)
84.499 μs (5 allocations: 797.77 KiB)
50163
julia> @btime test2($x, $10)
70.372 μs (2 allocations: 781.17 KiB)
50163
julia> @btime test3($x, $10)
26.880 μs (3 allocations: 16.59 KiB)
50163
julia> @btime test4($x, $10)
54.483 μs (0 allocations: 0 bytes)
50163
| 16:13:52 |
neoabs | I modified codes to be more realistic . I hope you like it | 16:15:34 |
neoabs | also
julia> test1(x, 10)
50163
julia> test2(x, 10)
50163
julia> test3(x, 10)
50163
julia> test4(x, 10)
50163
| 16:16:22 |
neoabs | it works with vectors only | 16:16:37 |
neoabs | try also inbounds in my example. I do not have time anymore | 16:23:17 |
neoabs | Andreas Weh: can you run the code on your machine a show times? | 16:42:50 |
neoabs | * using BenchmarkTools
function test1(x::Vector{Float64}, pad::Int; threshold::Float64=0.5)
return (pad ≥ size(x,1)÷2) ? 0 : sum( x[(1+pad):(end-pad)].> threshold)
end
function test2(x::Vector{Float64}, pad::Int; threshold::Float64=0.5)
return (pad ≥ size(x,1)÷2) ? 0 : count( >(threshold), x[(1+pad):(end-pad)] )
end
function test3(x::Vector{Float64}, pad::Int; threshold::Float64=0.5)
return (pad ≥ size(x,1)÷2) ? (0) : sum( view(x,(1+pad):(size(x,1)-pad)).> threshold)
end
function test4(x::Vector{Float64}, pad::Int; threshold::Float64=0.5)
return (pad ≥ size(x,1)÷2) ? (0) : count( >(threshold), view(x,(1+pad):(size(x,1)-pad)))
end
x = rand(100000)
@btime test1($x, $10)
@btime test2($x, $10)
@btime test3($x, $10)
@btime test4($x, $10)
| 16:43:43 |
Andreas Weh | With more 'realistic' you meant the short-circuit in case the padding is too large?
I omitted that as my code guarantees that this will always be the case. But in general, this is surely good practice. | 19:11:48 |
Andreas Weh | Is there a particular reason for you to constrain types so much? Julia's documentation suggests that this is bad practice and rather encourages writing generic (duck typed) code. Or do I misunderstand this? | 19:14:30 |
Andreas Weh | Here are the timing using your functions for comparison:
julia> @btime test1($x, $1, $0.5);
119.914 μs (5 allocations: 797.89 KiB)
julia> @btime test2($x, $1, $0.5);
84.901 μs (2 allocations: 781.30 KiB)
julia> @btime test3($x, $1, $0.5);
60.537 μs (3 allocations: 16.59 KiB)
julia> @btime test4($x, $1, $0.5);
91.239 μs (0 allocations: 0 bytes)
(I dropped the keyword argument, to avoid the tiny overhead for exact comparison).
| 19:19:42 |
Andreas Weh | And again the comparison to my versions:
julia> @btime test1($x, $0.5, $1);
91.228 μs (0 allocations: 0 bytes)
julia> @btime test2($x, $0.5, $1);
22.927 μs (0 allocations: 0 bytes)
julia> @btime test3($x, $0.5, $1);
23.822 μs (9 allocations: 288 bytes)
| 19:23:29 |
neoabs | In reply to @wehandre:matrix.physik.uni-augsburg.de
And again the comparison to my versions:
julia> @btime test1($x, $0.5, $1);
91.228 μs (0 allocations: 0 bytes)
julia> @btime test2($x, $0.5, $1);
22.927 μs (0 allocations: 0 bytes)
julia> @btime test3($x, $0.5, $1);
23.822 μs (9 allocations: 288 bytes)
i think my x is 10x larger. | 19:35:44 |
Andreas Weh | That's why I added the new timings on the same x | 19:36:51 |
neoabs | ah ok | 19:37:12 |
neoabs | test2 seems optimal | 19:37:43 |
neoabs | your | 19:37:47 |
neoabs | I think it deserves a thread | 19:39:34 |
neoabs | on discourse | 19:39:37 |
neoabs | next level: pmap | 19:40:03 |
Andreas Weh | Exactly, this is my issue. It is by far the fastes but doesn't allow for views (SubArrays ). I would prefer, pulling the padding out of the function and just pass the view, better encapsulating the logic (at least for my use case). | 19:40:06 |
Andreas Weh | * Exactly, this is my issue. It is by far the fastest but doesn't allow for views (SubArrays ). I would prefer, pulling the padding out of the function and just pass the view, better encapsulating the logic (at least for my use case). | 19:40:25 |
Andreas Weh | At least for large arrays, test3 seems like an acceptable way to allow views. Like I said, the code is borrowed from an GitHub issue. | 19:41:37 |
neoabs | views are not meant to be used as primary object. They are just to speed up / free up memomory. In your case they are simply obsolete. also
| 19:42:03 |
Andreas Weh | This is exactly the point. I want to use views to avoid copies. This however slows down my code, as count is slow.
So I had to refactor my whole code base, moving the padding within the function. This obfuscates my code, so I am looking for a way to have fast counting on views. I use views instead of slices which copies exactly because this code is performance critical | 19:44:21 |
Andreas Weh | But thanks for the input | 19:44:49 |
neoabs | if i undestand corretly sum is implemented in such a way that if it is defined over collections etc it does not generate collection and then callculates sum but callculcates sum while generateing subsequent elkements. That means using for ans other iterative generators are optimal | 19:50:15 |