Erlang binary optimization known integer

Erlang binary optimization known integer - optimization

Can this be further optimized:
Binary = <<"2345", 1, "restofmessageexistshere">>
get_integer_value(Binary) ->
[Num, _, LastRest] = integer_value(Binary),
[Num, LastRest].
integer_value(<<1, _Rest/binary>>) -> [0, 1, _Rest];
integer_value(<<H:8, Rest/binary>>) ->
% io:format("~n~p~n", [Rest]),
[Num, Exp, LastRest] = integer_value(Rest),
[(H-48)*Exp + Num, Exp*10, LastRest].
Expected Result -> [2345, "restofmessageexistshere"]

You could use a function like the following one:
integer_value(Bin) ->
integer_value(Bin, 0).
integer_value(<<Char, Tail/binary>>, Acc) when (Char >= $0) and (Char =< $9) ->
integer_value(Tail, Acc * 10 + (Char - $0));
integer_value(<<1, Tail/binary>>, Acc) ->
[Acc, Tail];
integer_value(Bin, _Acc) ->
%% Throw an exception if the argument is not in the correct format
erlang:error(badarg, [Bin]).
If you call integer_value(<<"2345", 1, "restofmessageexistshere">>) you'll get [2345, "restofmessageexistshere"].
This function solves your problem, but as the previous poster said, you might want to explain what you want to do to make sure that this is the best solution for your problem.

Related

Disregard component of a Triple in a comparison

I am attempting to compare Triples while disregarding certain values of the Triple. The value I wish to disregard below is signified by _. Note the below code is for example purposes and does not compile because _ is an Unresolved reference.
val coordinates = Triple(3, 2, 5)
when (coordinates) {
Triple(0, 0, 0) -> println("Origin")
Triple(_, 0, 0)-> println("On the x-axis.")
Triple(0, _, 0)-> println("On the y-axis.")
Triple(0, 0, _)-> println("On the z-axis.")
else-> println("Somewhere in space")
}
I know you can use _ when destructuring if you would like to ignore a value but that doesn't seem to help me with the above issue:
val (x4, y4, _) = coordinates
println(x4)
println(y4)
Any ideas how I can achieve this?
Thank you!

Underscore for unused variables was introduced in Kotlin 1.1 and it is designed to be used when some variables are not needed in the destructuring declaration.
In the branch conditions of your when expression, Triple(0, 0, 0) is creating an new instance but not destructuring. So, using underscore is not permitted here.
Currently, destructuring in the branch conditions of when expression is not possible in Kotlin. One of the solutions for your case is to compare each of the component verbosely in each branch condition:
val (x, y, z) = Triple(3, 2, 5)
when {
x == 0 && y == 0 && z == 0 -> println("Origin")
y == 0 && z == 0 -> println("On the x-axis.")
x == 0 && z == 0 -> println("On the y-axis.")
x == 0 && y == 0 -> println("On the z-axis.")
else -> println("Somewhere in space")
}
Here is a discussion on destructuring in when expression.

Prime numbers in Idris

In idris 0.9.17.1,
with inspiration from https://wiki.haskell.org/Prime_numbers,
I've written the following code for generating prime numbers
module Main
concat: List a -> Stream a -> Stream a
concat [] ys = ys
concat (x :: xs) ys = x :: (concat xs ys)
generate: (Num a, Ord a) => (start:a) -> (step:a) -> (max:a) -> List a
generate start step max = if (start < max) then start :: generate (start + step) step max else []
mutual
sieve: Nat -> Stream Int -> Int -> Stream Int
sieve k (p::ps) x = concat (start) (sieve (k + 1) ps (p * p)) where
fs: List Int
fs = take k (tail primes)
start: List Int
start = [n | n <- (generate (x + 2) 2 (p * p - 2)), (all (\i => (n `mod` i) /= 0) fs)]
primes: Stream Int
primes = 2 :: 3 :: sieve 0 (tail primes) 3
main:IO()
main = do
printLn $ take 10 primes
In the REPL, if I write take 10 primes, the REPL correctly shows [2, 3, 5, 11, 13, 17, 19, 29, 31, 37] : List Int
But if I try :exec, nothing happen and if I try to compile ans execute the program I get Segmentation fault: 11
Can someone help me to debug this problem ?

Your concat function can be made lazy to fix this. Just change its type to
concat : List a -> Lazy (Stream a) -> Stream a
This will do it.
Note:
To get all primes, change the < inside the generate function into <=
(Currently some are missing, e.g. 7 and 23).

To memoize or not to memoize

... that is the question. I have been working on an algorithm which takes an array of vectors as input, and part of the algorithm repeatedly picks pairs of vectors and evaluates a function of these two vectors, which doesn't change over time. Looking at ways to optimize the algorithm, I thought this would be a good case for memoization: instead of recomputing the same function value over and over again, cache it lazily and hit the cache.
Before jumping to code, here is the gist of my question: the benefits I get from memoization depend on the number of vectors, which I think is inversely related to number of repeated calls, and in some circumstances memoization completely degrades performance. So is my situation inadequate for memoization? Am I doing something wrong, and are there smarter ways to optimize for my situation?
Here is a simplified test script, which is fairly close to the real thing:
open System
open System.Diagnostics
open System.Collections.Generic
let size = 10 // observations
let dim = 10 // features per observation
let runs = 10000000 // number of function calls
let rng = new Random()
let clock = new Stopwatch()
let data =
[| for i in 1 .. size ->
[ for j in 1 .. dim -> rng.NextDouble() ] |]
let testPairs = [| for i in 1 .. runs -> rng.Next(size), rng.Next(size) |]
let f v1 v2 = List.fold2 (fun acc x y -> acc + (x-y) * (x-y)) 0.0 v1 v2
printfn "Raw"
clock.Restart()
testPairs |> Array.averageBy (fun (i, j) -> f data.[i] data.[j]) |> printfn "Check: %f"
printfn "Raw: %i" clock.ElapsedMilliseconds
I create a list of random vectors (data), a random collection of indexes (testPairs), and run f on each of the pairs.
Here is the memoized version:
let memoized =
let cache = new Dictionary<(int*int),float>(HashIdentity.Structural)
fun key ->
match cache.TryGetValue(key) with
| true, v -> v
| false, _ ->
let v = f data.[fst key] data.[snd key]
cache.Add(key, v)
v
printfn "Memoized"
clock.Restart()
testPairs |> Array.averageBy (fun (i, j) -> memoized (i, j)) |> printfn "Check: %f"
printfn "Memoized: %i" clock.ElapsedMilliseconds
Here is what I am observing:
* when size is small (10), memoization goes about twice as fast as the raw version,
* when size is large (1000), memoization take 15x more time than raw version,
* when f is costly, memoization improves things
My interpretation is that when the size is small, we have more repeat computations, and the cache pays off.
What surprised me was the huge performance hit for larger sizes, and I am not certain what is causing it. I know I could improve the dictionary access a bit, with a struct key for instance - but I didn't expect the "naive" version to behave so poorly.
So - is there something obviously wrong with what I am doing? Is memoization the wrong approach for my situation, and if yes, is there a better approach?

I think memoization is a useful technique, but it is not a silver bullet. It is very useful in dynamic programming where it reduces the (theoretical) complexity of the algorithm. As an optimization, it can (as you would probably expect) have varying results.
In your case, the cache is certainly more useful when the number of observations is smaller (and f is more expensive computation). You can add simple statistics to your memoization:
let stats = ref (0, 0) // Count number of cache misses & hits
let memoized =
let cache = new Dictionary<(int*int),float>(HashIdentity.Structural)
fun key ->
let (mis, hit) = !stats
match cache.TryGetValue(key) with
| true, v -> stats := (mis, hit + 1); v // Increment hit count
| false, _ ->
stats := (mis + 1, hit); // Increment miss count
let v = f data.[fst key] data.[snd key]
cache.Add(key, v)
v
For small size, the numbers I get are something like (100, 999900) so there is a huge benefit from memoization - the function f is computed 100x and then each result is reused 9999x.
For big size, I get something like (632331, 1367669) so f is called many times and each result is reused just twice. In that case, the overhead with allocation and lookup in the (big) hash table is much bigger.
As a minor optimization, you can pre-allocate the Dictionary and write new Dictionary<_, _>(10000,HashIdentity.Structural), but that does not seem to help much in this case.
To make this optimization efficient, I think you would need to know some more information about the memoized function. In your example, the inputs are quite regular, so there is porbably no point in memoization, but if you know that the function is more often called with some values of arguments, you can perhaps only memoize only for these common arguments.

Tomas's answer is great for when you should use memoization. Here's why memoization is going so slow in your case.
It sounds like you're testing in Debug mode. Run your test again in Release and you should get a faster result for memoization. Tuples can cause a large performance hit while in Debug mode. I added a hashed version for comparison along with some micro optimizations.
Release
Raw
Check: 1.441687
Raw: 894
Memoized
Check: 1.441687
Memoized: 733
memoizedHash
Check: 1.441687
memoizedHash: 552
memoizedHashInline
Check: 1.441687
memoizedHashInline: 493
memoizedHashInline2
Check: 1.441687
memoizedHashInline2: 385
Debug
Raw
Check: 1.409310
Raw: 797
Memoized
Check: 1.409310
Memoized: 5190
memoizedHash
Check: 1.409310
memoizedHash: 593
memoizedHashInline
Check: 1.409310
memoizedHashInline: 497
memoizedHashInline2
Check: 1.409310
memoizedHashInline2: 373
Source
open System
open System.Diagnostics
open System.Collections.Generic
let size = 10 // observations
let dim = 10 // features per observation
let runs = 10000000 // number of function calls
let rng = new Random()
let clock = new Stopwatch()
let data =
[| for i in 1 .. size ->
[ for j in 1 .. dim -> rng.NextDouble() ] |]
let testPairs = [| for i in 1 .. runs -> rng.Next(size), rng.Next(size) |]
let f v1 v2 = List.fold2 (fun acc x y -> acc + (x-y) * (x-y)) 0.0 v1 v2
printfn "Raw"
clock.Restart()
testPairs |> Array.averageBy (fun (i, j) -> f data.[i] data.[j]) |> printfn "Check: %f"
printfn "Raw: %i\n" clock.ElapsedMilliseconds
let memoized =
let cache = new Dictionary<(int*int),float>(HashIdentity.Structural)
fun key ->
match cache.TryGetValue(key) with
| true, v -> v
| false, _ ->
let v = f data.[fst key] data.[snd key]
cache.Add(key, v)
v
printfn "Memoized"
clock.Restart()
testPairs |> Array.averageBy (fun (i, j) -> memoized (i, j)) |> printfn "Check: %f"
printfn "Memoized: %i\n" clock.ElapsedMilliseconds
let memoizedHash =
let cache = new Dictionary<int,float>(HashIdentity.Structural)
fun key ->
match cache.TryGetValue(key) with
| true, v -> v
| false, _ ->
let i = key / size
let j = key % size
let v = f data.[i] data.[j]
cache.Add(key, v)
v
printfn "memoizedHash"
clock.Restart()
testPairs |> Array.averageBy (fun (i, j) -> memoizedHash (i * size + j)) |> printfn "Check: %f"
printfn "memoizedHash: %i\n" clock.ElapsedMilliseconds
let memoizedHashInline =
let cache = new Dictionary<int,float>(HashIdentity.Structural)
fun key ->
match cache.TryGetValue(key) with
| true, v -> v
| false, _ ->
let i = key / size
let j = key % size
let v = f data.[i] data.[j]
cache.Add(key, v)
v
printfn "memoizedHashInline"
clock.Restart()
let mutable total = 0.0
for i, j in testPairs do
total <- total + memoizedHashInline (i * size + j)
printfn "Check: %f" (total / float testPairs.Length)
printfn "memoizedHashInline: %i\n" clock.ElapsedMilliseconds
printfn "memoizedHashInline2"
clock.Restart()
let mutable total2 = 0.0
let cache = new Dictionary<int,float>(HashIdentity.Structural)
for i, j in testPairs do
let key = (i * size + j)
match cache.TryGetValue(key) with
| true, v -> total2 <- total2 + v
| false, _ ->
let i = key / size
let j = key % size
let v = f data.[i] data.[j]
cache.Add(key, v)
total2 <- total2 + v
printfn "Check: %f" (total2 / float testPairs.Length)
printfn "memoizedHashInline2: %i\n" clock.ElapsedMilliseconds
Console.ReadLine() |> ignore

Simpler way to format bytesize in a human readable way?

I came up with the following solution to format an integer (bytesize of a file). Is there any better/shorter solution? I esacially don't like the float_as_string() part.
human_filesize(Size) ->
KiloByte = 1024,
MegaByte = KiloByte * 1024,
GigaByte = MegaByte * 1024,
TeraByte = GigaByte * 1024,
PetaByte = TeraByte * 1024,
human_filesize(Size, [
{PetaByte, "PB"},
{TeraByte, "TB"},
{GigaByte, "GB"},
{MegaByte, "MB"},
{KiloByte, "KB"}
]).
human_filesize(Size, []) ->
integer_to_list(Size) ++ " Byte";
human_filesize(Size, [{Block, Postfix}|List]) ->
case Size >= Block of
true ->
float_as_string(Size / Block) ++ " " ++ Postfix;
false ->
human_filesize(Size, List)
end.
float_as_string(Float) ->
Integer = trunc(Float), % Part before the .
NewFloat = 1 + Float - Integer, % 1.<part behind>
FloatString = float_to_list(NewFloat), % "1.<part behind>"
integer_to_list(Integer) ++ string:sub_string(FloatString, 2, 4).
Edit: Fixed bug round() -> trunc()

human_filesize(Size) -> human_filesize(Size, ["B","KB","MB","GB","TB","PB"]).
human_filesize(S, [_|[_|_] = L]) when S >= 1024 -> human_filesize(S/1024, L);
human_filesize(S, [M|_]) ->
io_lib:format("~.2f ~s", [float(S), M]).
Note that this returns an iolist. If you need a string, you can convert that to binary and that to string.

How to format a number with padding in Erlang

I need to pad the output of an integer to a given length.
For example, with a length of 4 digits, the output of the integer 4 is "0004" instead of "4". How can I do this in Erlang?

adding a bit of explanation to Zed's answer:
Erlang Format specification is: ~F.P.PadModC.
"~4..0B~n" translates to:
~F. = ~4. (Field width of 4)
P. = . (no Precision specified)
Pad = 0 (Pad with zeroes)
Mod = (no control sequence Modifier specified)
C = B (Control sequence B = integer in default base 10)
and ~n is new line.

io:format("~4..0B~n", [Num]).

string:right(integer_to_list(4), 4, $0).

The problem with io:format is that if your integer doesn't fit, you get asterisks:
> io:format("~4..0B~n", [1234]).
1234
> io:format("~4..0B~n", [12345]).
****
The problem with string:right is that it throws away the characters that don't fit:
> string:right(integer_to_list(1234), 4, $0).
"1234"
> string:right(integer_to_list(12345), 4, $0).
"2345"
I haven't found a library module that behaves as I would expect (i.e. print my number even if it doesn't fit into the padding), so I wrote my own formatting function:
%%------------------------------------------------------------------------------
%% #doc Format an integer with a padding of zeroes
%% #end
%%------------------------------------------------------------------------------
-spec format_with_padding(Number :: integer(),
Padding :: integer()) -> iodata().
format_with_padding(Number, Padding) when Number < 0 ->
[$- | format_with_padding(-Number, Padding - 1)];
format_with_padding(Number, Padding) ->
NumberStr = integer_to_list(Number),
ZeroesNeeded = max(Padding - length(NumberStr), 0),
[lists:duplicate(ZeroesNeeded, $0), NumberStr].
(You can use iolist_to_binary/1 to convert the result to binary, or you can use lists:flatten(io_lib:format("~s", [Result])) to convert it to a list.)

Eshell V12.0.3 (abort with ^G)
1> F = fun(Max, I)-> case Max - length(integer_to_list(I)) of X when X > 0 -> string:chars($0, X) ++ integer_to_list(I); _ -> I end end.
#Fun<erl_eval.43.40011524>
2> F(10, 22).
"0000000022"
3> F(3, 22345).
22345

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Erlang binary optimization known integer - optimization

Related

Disregard component of a Triple in a comparison

Prime numbers in Idris

To memoize or not to memoize

Simpler way to format bytesize in a human readable way?

How to format a number with padding in Erlang

Categories

Resources