Skip to content

Instantly share code, notes, and snippets.

@killerswan
Last active December 21, 2015 07:29
Show Gist options
  • Save killerswan/6271720 to your computer and use it in GitHub Desktop.
Save killerswan/6271720 to your computer and use it in GitHub Desktop.
Exploring the laziness of F# sequences: In the first revision, I complained about non-local exceptions if I didn't force evaluation of a `Seq.take`. The latest versions are better about that, and other things...
module Seq2 =
// split into head and tail subsequences
let split nn (xs: seq<'UU>) : seq<'UU> * seq<'UU> =
let head =
Seq.truncate nn xs |> Seq.cache
let tail =
if (Seq.length head < nn) then
Seq.empty
else
Seq.skip nn xs |> Seq.cache
(head, tail)
let split' nn (xs: seq<'UU>) : seq<'UU> * seq<'UU> =
let en = xs.GetEnumerator()
let next, nn = ref true, ref nn
seq {
// start enumerating
next:= en.MoveNext()
while !next && !nn > 0 do
yield! try [en.Current] with _ -> []
next := en.MoveNext()
decr nn
},
seq {
while !next do
yield en.Current
next:= en.MoveNext()
}
// return chunks of a sequence, each with nn items
// (the last chunk may be partially full)
let chunk_ split nn (xs: seq<'UU>) : seq<seq<'UU>> =
let empt zs =
//printf "«"
let e = Seq.isEmpty zs
//printf "» "
e
let gen ys =
//if Seq.isEmpty ys then
if empt ys then
None
else
Some(split nn ys)
Seq.unfold gen xs
// export...
let chunk nn (xs: seq<'UU>) : seq<seq<'UU>> =
chunk_ split' nn xs
let run () =
printfn "%A" <| split 2 [1]
printfn "%A" <| split 4 [1..4]
printfn "%A" <| split 4 [1..7]
printfn "%A" <| chunk_ split 2 [1]
printfn "%A" <| chunk_ split 2 [1..4]
printfn "%A" <| chunk_ split 4 [1..7]
printfn ""
(*
(seq [1], seq [])
(seq [1; 2; 3; 4], seq [])
(seq [1; 2; 3; 4], seq [5; 6; 7])
seq [seq [1]]
seq [seq [1; 2]; seq [3; 4]]
seq [seq [1; 2; 3; 4]; seq [5; 6; 7]]
*)
printfn "%A" <| split' 2 [1]
printfn "%A" <| split' 4 [1..4]
printfn "%A" <| split' 4 [1..7]
printfn "%A" <| chunk_ split' 2 [1]
printfn "%A" <| chunk_ split' 2 [1..4]
printfn "%A" <| chunk_ split' 4 [1..7]
printfn ""
(*
(seq [1], seq [])
(seq [1; 2; 3; 4], seq [])
(seq [1; 2; 3; 4], seq [5; 6; 7])
seq [seq [1]]
seq [seq [1; 2]; seq [3; 4]]
seq [seq [1; 2; 3; 4]; seq [5; 6; 7]]
*)
let range a b = seq { for i in a .. b -> printf "!%d " i; i }
let _print xs = printf "seq ["; Seq.iter (printf "%d ") xs; printf "] "
let print (head,tail) = printf "( "; _print head; printf ", "; _print tail; printfn ")"
let print2 xss = Seq.iter _print xss; printfn ""
print <| split 2 (range 1 1)
print <| split 4 (range 1 4)
print <| split 4 (range 1 7)
print2 <| chunk_ split 2 (range 1 1)
print2 <| chunk_ split 2 (range 1 4)
print2 <| chunk_ split 4 (range 1 7)
printfn ""
(*
!1 ( seq [1 ] , seq [] )
!1 !2 !3 !4 ( seq [1 2 3 4 ] , seq [!1 !2 !3 !4 ] )
!1 !2 !3 !4 ( seq [1 2 3 4 ] , seq [!1 !2 !3 !4 !5 5 !6 6 !7 7 ] )
!1 !1 seq [1 ]
!1 !1 !2 seq [1 2 ] !1 !2 !3 !4 seq [3 4 ]
!1 !1 !2 !3 !4 seq [1 2 3 4 ] !1 !2 !3 !4 !5 !6 !7 seq [5 6 7 ]
*)
print <| split' 2 (range 1 1)
print <| split' 4 (range 1 4)
print <| split' 4 (range 1 7)
print2 <| chunk_ split' 2 (range 1 1)
print2 <| chunk_ split' 2 (range 1 4)
print2 <| chunk_ split' 4 (range 1 7)
printfn ""
(*
( seq [!1 1 ] , seq [] )
( seq [!1 1 !2 2 !3 3 !4 4 ] , seq [] )
( seq [!1 1 !2 2 !3 3 !4 4 !5 ] , seq [5 !6 6 !7 7 ] )
!1 seq [!1 1 ]
!1 seq [!1 1 !2 2 !3 ] seq [3 !4 4 ]
!1 seq [!1 1 !2 2 !3 3 !4 4 !5 ] seq [5 !6 6 !7 7 ]
*)
run ()
@jackfoxy
Copy link

even this throws same error:

let seq_split nn (xs: seq<'UU>) : seq<'UU> * seq<'UU> =

   let head = Seq.take nn xs // |> List.ofSeq |> Seq.ofList // force local exception

   let tail = 
   if (Seq.length xs) < (nn + 1)
    then Seq.empty
    else Seq.skip nn xs

  (head, tail)

@killerswan
Copy link
Author

If we replace Seq.take with Seq.truncate, then that should work. Since Seq.skip works when it returns Seq.empty, too, this is the update I came here to post. It also avoids finding the full sequence length (only the nn which are in the head) leaving this somewhat more lazy:

// split into head and tail subsequences
let seq_split nn (xs: seq<'UU>) : seq<'UU> * seq<'UU> =

   let head =
      Seq.truncate nn xs

   let tail =
      if (Seq.length head < nn)
      then Seq.empty
      else Seq.skip nn xs

   (head, tail)

// return chunks of a sequence, each with nn items
// (the last chunk may be partially full)
let seq_chunk nn (xs: seq<'UU>) : seq<seq<'UU>> =

   let gen ys = 
      if Seq.isEmpty ys
      then
         None
      else
         let (head, tail) = seq_split nn ys
         Some(head, tail)

   Seq.unfold gen xs

printfn "%A" <| seq_split 2 [1]
printfn "%A" <| seq_split 4 [1;2;3;4;5;6;7]
printfn "%A" <| seq_chunk 2 [1]
printfn "%A" <| seq_chunk 2 [1;2;3;4;5]

@nagat01
Copy link

nagat01 commented Aug 20, 2013

I wrote the same thing in other way.

https://gist.github.com/nagat01/6280003

In the above gist, Seq.split' has no strict evaluation over the sequence because Seq.length isn't used in the function.
And Seq.take in Seq.split doesn't throws the exception because the length is checked before calling it.

I admit that my Seq.split' is too long..

But I spent so long time to eliminate strict evaluation in the split function, then I post it here..

@killerswan
Copy link
Author

@nagat01 Where do the zeroes come from? Your split is very lazy, but also isn't finished yet...
I love the way your range function makes it clear when a seq is being expanded, though!

@killerswan
Copy link
Author

@nagat01 I've fixed the zeroes: those were getting printed when you didn't first call a .MoveNext() before reading. And even playing around with Seq.cache, I can't make split as efficient as split' yet. :D

(See above!)

@nagat01
Copy link

nagat01 commented Aug 22, 2013

I couldn't find the zeroes.. You would have better eyeballs than mine :)
And thank you for correcting and investigating it.

BTW, Seq.cache would significantly speed up the nested call of Seq.skip and "Seq.length head" is so efficient than calculating length of whole the sequence (I didn't notice this).
Then, it would have practically a good performance.
And the code is concise.
Then I'll use your split when I need it :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment