10 | > 1 + 1
⧩
2
13 | > List.map (x -> x + 10) [0,1,2,3,4]
⧩
[10, 11, 12, 13, 14]
16 | > Storage.mem 'let
⧩
Right 7
Last active
August 6, 2020 03:20
-
-
Save pchiusano/9f932bc9d6cb5e192c96dcc817e7e290 to your computer and use it in GitHub Desktop.
Word count example in Unison
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- imports, not too exciting | |
use DSeq Empty One Two | |
use Storage save restore | |
use Mem Mem | |
use Remote at await | |
-- These are called watch expressions, any line starting with `>` | |
-- gets evaluated when you save the file (and cached), so you can use | |
-- your scratch file a bit like a spreadsheet. | |
> 1 + 1 | |
-- Here's a watch expression showing list literal syntax and function application | |
> List.map (x -> x + 10) [0,1,2,3,4] | |
-- And here's an in-memory word-count example, running on the local machine | |
> Storage.mem 'let | |
-- Just creating a dataset from an in-memory list | |
-- Could use DSeq.fromS3, create from file system, etc | |
corpus = DSeq.fromList ["alice bob", "carol dave", "eve frank gerald"] | |
-- Let's run our map-reduce job on this dataset: | |
-- `locally` runs the provided computation on the current machine but | |
-- we could take the same computation and run it unchanged on our cloud platform | |
locally '(mapReduce usEast (List.size . Text.split Char.space) 0 (+) corpus) | |
-- Read on for more detail | |
-- Simple distributed sequence type structure. Guy Steele calls these Conc lists[1] | |
-- (short for "concatenatable") but here we make the child pointers lazy by wrapping | |
-- them in an abstract type constructor, `d`. Values of type `d a` will be references | |
-- to an external storage layer. | |
-- | |
-- You can give the same treatment to any purely functional data structure to | |
-- "make it distributed". These are immutable structures, useful for batch compute. | |
-- | |
-- [1]: Organizing Functional Code for Parallel Execution; or, foldl and foldr Considered Slightly Harmful | |
-- https://vimeo.com/6624203 | |
type DSeq d a | |
= Empty | |
| One a | |
| Two (d (DSeq d a)) (d (DSeq d a)) | |
-- This is a (likely too simple, but ok for demo) interface to a storage layer. You | |
-- can save a value and get back a reference to it in external storage, and you can | |
-- `restore` a reference back to a value in memory. | |
-- | |
-- Where this is too simple: there's no notion of _where_ you're storing, so it's | |
-- entirely up to the implementation of this interface to decide. We have a richer | |
-- API for storage that lets the programmer make more fine-grained decisions on data | |
-- locality. | |
ability Storage d where | |
save : a -> d a | |
restore : d a -> a | |
-- Here's creating a distributed sequence from an in-memory list | |
DSeq.fromList : [a] ->{Storage d} DSeq d a | |
DSeq.fromList = cases | |
[] -> Empty | |
[a] -> One a | |
as -> match halve as with | |
(left, right) -> Two (save (DSeq.fromList left)) (save (DSeq.fromList right)) | |
-- Just a stub | |
DSeq.utf8DirectoryContents : Text ->{IO, Storage d} DSeq d Text | |
DSeq.utf8DirectoryContents dirname = | |
todo "load contents of the directory, decode files via utf-8" | |
-- Just a stub | |
DSeq.fromS3 : Text ->{IO, Storage d} DSeq d a | |
DSeq.fromS3 bucket = todo "load data from S3, can be done in parallel" | |
-- Here's an interpreter of the `Storage` interface that uses in-memory | |
-- storage. | |
Storage.mem.handler : Request {Storage Mem} a -> a | |
Storage.mem.handler = cases | |
{ a } -> a | |
{ Storage.save a -> resume } -> | |
handle resume (Mem a) | |
with Storage.mem.handler | |
{ Storage.restore (Mem a) -> resume } -> | |
handle resume a | |
with Storage.mem.handler | |
-- For this demo, we're just using in-memory storage. | |
type Mem a = Mem a | |
-- Distributed map reduce implementation, takes in a location, mapping function, | |
-- reducing function, default value, and distributed sequence type, produces a | |
-- result. The type signature is a mouthful - Unison can also infer type signatures liked this. | |
-- | |
-- Requires the `Remote` ability. | |
mapReduce : loc -> (a -> b) -> b -> (b -> b -> b) -> DSeq d a ->{Remote loc task result {Storage d, g}} b | |
mapReduce loc f z op = cases | |
Empty -> z | |
One a -> f a | |
Two left right -> | |
fl = at loc '(mapReduce loc f z op (restore left)) | |
fr = at loc '(mapReduce loc f z op (restore right)) | |
op (await fl) (await fr) | |
Char.space = ?\s | |
-- For this demo, not doing anything with location info, since everything's running locally, | |
-- so we're considering locations to just be values of type Unit | |
Cloud.stub.usEast = () | |
-- locally execute a remote computation | |
locally g = Exception.toEither '(Remote.local! g) | |
-- have storage layer calls in `g` use in-memory storage | |
Storage.mem g = handle !g with Storage.mem.handler |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment