Commit 0e4f5a851108f71497ebeda792f2f4ab5932970a - documents

+7

-0

duality/Makefile less more

	1	all: duality.pdf
	2
	3	duality.pdf: duality.tex
	4	pdflatex duality.tex
	5
	6	clean:
	7	rm -rf duality.aux duality.log duality.pdf

+242

-0

duality/duality.tex less more

	1	\documentclass{scrartcl}
	2	\usepackage[letterpaper]{geometry}
	3	\usepackage{fullpage}
	4	\usepackage{cmbright}
	5	\usepackage[usenames,dvipsnames]{color}
	6	\usepackage{listings}
	7	% \newcommand{\both}[2]{\begin{tabular}{ p{4in} p{4in} } #1 && #2 \\ \end{tabular}}
	8	\newcommand{\pos}[1]{\textcolor{BrickRed}{#1}}
	9	\newcommand{\postt}[1]{\textcolor{BrickRed}{\texttt{#1}}}
	10	\newcommand{\dual}[1]{\textcolor{NavyBlue}{#1}}
	11	\newcommand{\dualtt}[1]{\textcolor{NavyBlue}{\texttt{#1}}}
	12	\newenvironment{both}
	13	{ \begin{tabular}{ p{3in} p{3in} } }
	14	{ \\ \end{tabular} }
	15
	16	\renewcommand{\ttdefault}{pcr}
	17	\lstset{basicstyle=\ttfamily,columns=flexible,language=[LaTeX]TeX,%
	18	texcl,%set comments in an extra line
	19	mathescape=false,escapechar=\|,
	20	literate={data}{\bfseries\textcolor{BrickRed}{data }}1
	21	{codata}{\bfseries\textcolor{NavyBlue}{data }}1
	22	{record}{\bfseries\textcolor{NavyBlue}{record }}1
	23	{case}{\bfseries\textcolor{BrickRed}{case }}1
	24	{of}{\bfseries\textcolor{BrickRed}{of }}1
	25	{merge}{\bfseries\textcolor{NavyBlue}{merge }}1
	26	{from}{\bfseries\textcolor{NavyBlue}{from }}1
	27	{if}{\bfseries{if }}1
	28	{then}{\bfseries{then }}1
	29	{else}{\bfseries{else }}1
	30	{assert}{\bfseries{assert }}1
	31	{<+>}{{+ }}1
	32	{+}{\textcolor{BrickRed}{+ }}1
	33	{}{\textcolor{NavyBlue}{ }}1
	34	{<pos>}{\color{BrickRed}}1
	35	{</pos>}{\normalcolor}1
	36	{<dual>}{\color{NavyBlue}}1
	37	{</dual>}{\normalcolor}1
	38	,%
	39	texcsstyle=*\color{blue!70}\bfseries,
	40	texcs={end,begin,documentclass}
	41	}
	42	\begin{document}
	43	\section{\pos{Data} and \dual{Codata}}
	44
	45	\begin{both}
	46	\pos{Data} is defined by its \pos{introduction} rules.
	47	& \dual{Codata} is defined by its \dual{elimination} rules.
	48	\\
	49	We can define a \pos{data} type with
	50	& We can define a \dual{codata} type with
	51	\\
	52	\begin{lstlisting}
	53	data <pos>Either</pos> a b
	54	= Left a
	55	+ Right b
	56	\end{lstlisting}
	57	&
	58	\begin{lstlisting}
	59	codata <dual>Both</dual> a b
	60	= first a
	61	* second b
	62	\end{lstlisting}
	63	\\
	64	This definition will effectively introduce two functions:
	65	& This definition will effectively introduce two functions:
	66	\\
	67	\begin{lstlisting}
	68	Left : a -> <pos>Either</pos> a b
	69	Right : b -> <pos>Either</pos> a b
	70	\end{lstlisting}
	71	&
	72	\begin{lstlisting}
	73	first : <dual>Both</dual> a b -> a
	74	second : <dual>Both</dual> a b -> b
	75	\end{lstlisting}
	76	\\
	77	In order to \pos{destruct data} we have to use
	78	\pos{patterns} which let you match on \pos{constructors}.
	79	& In order to \dual{construct codata} we have to use
	80	\dual{copatterns} which let you match on \dual{destructors}.
	81	\\
	82	\begin{lstlisting}
	83	case e of
	84	Left x -> e1
	85	Right y -> e2
	86	\end{lstlisting}
	87	&
	88	\begin{lstlisting}
	89	merge x from
	90	first x <- e1
	91	second x <- e2
	92	\end{lstlisting}
	93	\\
	94	Here, \postt{e} represents the \pos{value being
	95	destructed}, and each branch represents a \pos{constructor
	96	with which it might have been constructed}. We are effectively
	97	\pos{dispatching on possible pasts} of
	98	\postt{e}.
	99	& Here, \dual{\texttt{x}} represents the \dual{value being
	100	constructed}, and each branch represents a \dual{destructor
	101	with which it might eventually be destructed}. We are effectively
	102	\dual{dispatching on possible futures} of
	103	\dualtt{x}.
	104	\\
	105	\end{both}
	106
	107	\section{Codata, Records, and Copatterns}
	108	\raggedright
	109
	110	In the same way that named sums are a natural way to represent data,
	111	records are a natural way to represent codata. In lieu of the above
	112	syntax, one often sees codata represented as something more like
	113
	114	\begin{lstlisting}
	115	record <dual>Both</dual> a b = <dual>{</dual> .first : a * .second : b <dual>}</dual>
	116
	117	x : <dual>Both</dual> Int Bool
	118	x = <dual>{</dual> .first = 2 * .second = true <dual>}</dual>
	119
	120	assert x.first == 2
	121	assert x.second == true
	122	\end{lstlisting}
	123
	124	The \dualtt{merge} syntax is used here for conceptual
	125	symmetry with \postt{case}. Additionally, the use of
	126	copatterns is nicely dual with the extra expressivity that
	127	patterns offer. For example, we can use nested patterns with
	128	constructors of various types, as in this function which
	129	processes a list of \postt{Either Int Bool} values
	130	by summing the integers in the list until it reaches a
	131	\postt{false} value or the end of the list:
	132
	133	\begin{lstlisting}
	134	f : List (Either Int Bool) -> Int
	135	f lst = case lst of
	136	Cons (Left i) xs -> i <+> f xs
	137	Cons (Right b) xs -> if b then f xs else 0
	138	Nil -> 0
	139	\end{lstlisting}
	140
	141	Similarly, we can define an infinite stream of pairs using
	142	nested copatterns as so:
	143
	144	\begin{lstlisting}
	145	s : Int -> Stream (Both Int Bool)
	146	s n = merge x from
	147	first (head x) <- n
	148	second (head x) <- n > 0
	149	tail x <- x
	150	\end{lstlisting}
	151
	152	Copatterns are also practically expressive, as in this concise
	153	and readable definition of the fibonacci numbers in terms of
	154	the \dualtt{merge} expression:
	155
	156	\begin{lstlisting}
	157	codata Stream a
	158	= head a
	159	* tail (Stream a)
	160
	161	zipWith : (a -> b -> c) -> Stream a -> Stream b -> Stream c
	162	zipWith f s1 s2 = merge x from
	163	head x <- f (head s1) (head s2)
	164	tail x <- zipWith f (tail s1) (tail s2)
	165
	166	fibs : Stream Int
	167	fibs = merge x from
	168	head x <- 0
	169	head (tail x) <- 1
	170	tail (tail x) <- zipWith (<+>) x (tail x)
	171	\end{lstlisting}
	172
	173	\section{Row-Typed \dual{Co}data}
	174
	175	\begin{both}
	176	It is possible to build an \pos{open sum} by a small
	177	modification of the datatype mechanism. Instead of
	178	naming types and listing their associated
	179	\pos{constructors}, we represent a type as a
	180	\pos{list of constructors and types}.
	181	&
	182	It is possible to build an \dual{open product} by a small
	183	modification of the datatype mechanism. Instead of
	184	naming types and listing their associated
	185	\dual{destructors}, we represent a type as a
	186	\dual{list of destructors and types}.
	187	\\
	188	{\color{BrickRed}
	189	\begin{verbatim}
	190	{- .. -} : [ Left: a + Right: b ]
	191	\end{verbatim}
	192	}
	193	&
	194	{\color{NavyBlue}
	195	\begin{verbatim}
	196	{- .. -} : { first: a * second: b }
	197	\end{verbatim}
	198	}
	199	\\
	200	Use of a \pos{constructor} no longer specifies a
	201	specific type, but rather \pos{any type that can be
	202	constructed with that constructor}.
	203	&
	204	Use of a \dual{destructor} no longer specifies a
	205	specific type, but rather \dual{any type that can be
	206	destructed with that destructor}.
	207	\\
	208	\begin{lstlisting}
	209	Left : a -> <pos>[</pos> Left: a + ... <pos>]</pos>
	210	Right : b -> <pos>[</pos> Right: b + ... <pos>]</pos>
	211	\end{lstlisting}
	212	&
	213	\begin{lstlisting}
	214	first : <dual>{</dual> first: a * ... <dual>}</dual> -> a
	215	second : <dual>{</dual> second: b * ... <dual>}</dual> -> b
	216	\end{lstlisting}
	217	\\
	218	If we want to \pos{destruct} an open value like this, we
	219	can use a \postt{case} construct, as before.
	220	&
	221	If we want to \dual{construct} an open value like this, we
	222	can use a \dualtt{merge} construct, as before.
	223	\\
	224	{\color{BrickRed}
	225	\begin{verbatim}
	226	f : [ Left Int + Right Bool ] -> Int
	227	f e = case e of
	228	Left i -> i
	229	Right b -> if b then 1 else 0
	230	\end{verbatim}
	231	}
	232	&
	233	{\color{NavyBlue}
	234	\begin{verbatim}
	235	f : Int -> { First Int * Second Bool }
	236	f i = merge x from
	237	first x <- i
	238	second x <- i == 0
	239	\end{verbatim}
	240	}
	241	\end{both}
	242	\end{document}

+244

-0

posts/2014-02-03 less more

	1	# Subject-Oriented Programming
	2
	3	The idea of subject-oriented ostensibly originated as a critique of
	4	object-oriented programming. In most statically-typed class-based
	5	OOP systems[^2] beginners are encouraged to lay out their inheritance
	6	hierarchy by basing it
	7	on an ontology of concepts, i.e. the classic `Cat`-inherits-from-`Animal`
	8	model. In practice, as people become more and more fluent with writing code
	9	in an OOP language, they tend towards using interfaces more and more and
	10	inheritance less and less, so as to couple the components less tightly.
	11
	12	[^2]: Are there statically-typed prototype-based OOP systems?
	13
	14	As Borges notes in "The Analytical Language of John Wilkins", "...it is clear
	15	that there is no classification of the Universe not being arbitrary and full
	16	of conjectures. The reason for this is very simple: we do not know what thing
	17	the universe is." Necessarily, then, every ontology a programmer creates is
	18	going to be arbitrary. A simple example might be edible plants: one programmer
	19	will choose to structure their inheritance hierarchy such that `Tomato`
	20	inherits from `Fruit`, as it derives from the flowering tissues of plants,
	21	whereas another will declare that `Tomato extends Vegetable` because they are
	22	not particularly sweet when eaten—and anyway, the codebase already has
	23	`Salad makeSalad(ArrayList<Vegetable> ingredients)`.
	24
	25	Consequently, one problem with the inheritance-hierarchy-as-ontology system is (as Ossher
	26	and Harrison describe in their paper) that it only works if your program is
	27	guaranteed to always need/use that _particular_ ontology. To use a
	28	somewhat more practical video game
	29	example, let's say that you have game units in some manner of strategy game,
	30	and you decide to structure their class hierarchy as follows:
	31
	32	- Unit
	33	- Mobile
	34	- Ally
	35	- Enemy
	36	- Stationary
	37
	38	This is fine... until you'd like to have a stationary enemy (e.g. some kind
	39	of gun emplacement.) You could rearrange to the following:
	40
	41	- Unit
	42	- Ally
	43	- MobileAlly
	44	- StationaryAlly
	45	- Enemy
	46	- MobileEnemy
	47	- StationaryEnemy
	48
	49	...but aside from the work of refactoring, this now requires a third subclass
	50	of `Entity` to account for the neutral entities (like trees), and you can no
	51	longer write functions that take _any_ `Mobile` unit. As indicated before,
	52	seasoned OOP programmers might instead reach for interfaces (`implements
	53	Mobile, Ally`), or you might avoid encoding this information in the type at
	54	all (e.g. give each `Unit` instance variables corresponding to motility,
	55	alignment, &c.)
	56
	57	Nevertheless, there are some advantages to static type hierarchies. Perhaps we would
	58	like a hierarchy of mobility:
	59
	60	- Unit
	61	- Immobile
	62	- Mobile
	63	- Walking
	64	- Flying
	65	- Swimming
	66
	67	At times, we would like to write methods in terms of anything that can move,
	68	whereas at other times, we'd like to write methods solely in terms of swimming
	69	units, and at other times, we'd like to write methods in terms of any unit at
	70	all. This is one thing that Harrison and Ossher were attempting to preserve
	71	when they described Subject-Oriented Programming, and is the salient way in which
	72	it differs from Object-Oriented Programming: in an OOP system, an object `o`
	73	_is_ an instantiation of a class `C`, whereas in a SOP system, an entity[^1] `e`
	74	_can be seen as_ an activation of a subject `S`.
	75
	76	[^1]: Harrison and Ossher actually use the word _oid_, as short for _object
	77	identifier_, but I prefer the term _entity_.
	78
	79	In a Subject-Oriented model, we could have several distinct class
	80	hierarchies, e.g.
	81
	82	- Unit - Unit - Unit
	83	- Immobile - Allied - NoAttack
	84	- Mobile - PlayerOwned - Attacking
	85	- Walking - FriendlyNPC - MeleeAttacking
	86	- Flying - Enemy - AreaAttacking
	87	- Swimming - Wild - RangeAttacking
	88	- EnemyNPC
	89
	90	So if the player has an archer, it can be seen as a `Walking` unit, a
	91	`PlayerOwned` unit, and a `RangeAttacking` unit. If we then write a method
	92	that works with a `Mobile` unit, or an `Allied` unit, or an `Attacking`
	93	unit, we can pass it the player's archer, despite the fact that these are
	94	all distinct ontologies. I'm handwaving wildly over the implementation
	95	details for the moment, and to some extent I'm diverging from the original
	96	conception, but I'm trying to stay roughly faithful to the ideas as
	97	presented.
	98
	99	Curiously, Subject-Oriented Programming has a large conceptual overlap with
	100	an idea quite in vogue among game developers, that of Component-Entity
	101	Systems. What Harrison and Ossher would call _subjects_ are there called
	102	_components_, and many of the details as implemented are different, but the
	103	critical idea—that of an `entity` which takes on various roles as needed—is
	104	present in both systems.
	105
	106	# A Subject-Oriented λ-Calculus
	107
	108	What would a subject-oriented λ-calculus look like? I'm gonna come up with
	109	something that _looks_ like subject-oriented programming if you squint, but
	110	it's not going to be particularly faithful to the original formulation.
	111
	112	What operations are valid on a particular entity? Well, it depends on the
	113	subjects that entity has associated. We have a set of them, and each one
	114	has fields and methods associated. That suggests that perhaps an entity
	115	should look sort of like a _record_ of sorts—in particular, a record in a
	116	language with _row polymorphism_. I'm gonna come up with a code sample, and
	117	then elaborate on which primitive operations we need:
	118
	119	subject S of Int
	120	subject T of Bool
	121
	122	f : {S,T,...} -> Int
	123	f e = get S i from e
	124	T b from e
	125	in if b then i else 0
	126
	127	y : Int
	128	y = let x = ε
	129	x' = add S 3 to x
	130	x'' = add T True to x'
	131	in f x''
	132
	133	What's going on here? Well, we're declaring our _subjects_ up front, but
	134	those are effectively labels for a record. We use those with the
	135	`add ... to ...` operation, which acts like record update, and extract
	136	them with `get ... from ... in ...`. I'm here using epsilon as the
	137	_"empty" entity_ but that's more or less arbitrary.
	138
	139
	140
	141	XXX -- delete?
	142
	143	So let's start with the expression language. We'll start with a rough
	144	simply-typed λ-calculus with products, sums, and a unit type, and then
	145	add things one-by-one.
	146
	147	e := (e, e) \| inl e \| inr e \| unit \| λx:t. e
	148
	149	So far, so good. The next thing we can do is create an _empty entity_,
	150	which is an entity with no subjects associated with it.
	151
	152	e := ... \| ε
	153
	154	we can also _add a subject_ to an entity
	155
	156	e := ... \| add S e to e
	157
	158	and _access a subject's field_
	159
	160	e := get S x from e in e
	161
	162	Instead of explaining what these operations do, I'll jump straight to a
	163	chunk of code:
	164
	165	To begin with, we declare two different subjects, `S` (which requires an
	166	argument of type `Int`) and `T` (which requires an argument of type
	167	`Bool`). We then define a function `f`, which takes an entity, and
	168	extracts from it the subject `S` with its associated value and the subject
	169	`T` with its associated value, and then uses those values in rather
	170	traditional functional code.
	171
	172	The definition of the value `y` is done by creating an intermediate
	173	entity called `x`, then creating a new entity `x'` that is the same as
	174	`x` but nevertheless able to be viewed as an `S` as well, and then
	175	an entity `x''` which can be viewed as either an `S` or a `T`, and then
	176	passes that to `f` (which requires an entity that can be seen as
	177	both `S` and `T` simultaneously.) The final value of `y` is going to
	178	be `3`.
	179
	180	This gives us a clue about how types will work in our calculus, as
	181	well—an entity must have a type corresponding to the set of subjects it
	182	implements. Consequently, we have the following types in the program:
	183
	184	x : {}
	185	x' : {S}
	186	x'' : {S,T}
	187	y : Int
	188	f : {S,T} -> Int
	189
	190	This is of course a ridiculous toy program, but we can begin to see some
	191	of the previous features of Subject-Oriented Programming peeking out at
	192	us:
	193
	194	subject HasPosition of Int * Int
	195	subject HasVelocity of Int * Int
	196
	197	updateEntity : Ref {HasPosition,HasVelocity} -> ()
	198	updateEntity someRef =
	199	let e = !someRef in
	200	get HasPosition (x,y) from e
	201	HasVelocity (dx,dy) from e in
	202	someRef := add HasPosition (x + dx, y + dy) to e
	203
	204	And so forth. A practical
	205
	206
	207	# The Mezzo Language
	208
	209	There's a functional language—an ML variant—being developed by Jonathan
	210	Protzenko and François Pottier at INRIA. It's an interesting effort. You
	211	can read about its motivations and capabilities elsewhere, but the most
	212	interesting is the way it handles what it calls _permissions_. A _permission_
	213	corresponds somewhat to what other languages call a _type_, but the semantics
	214	are... different.
	215
	216	For example, in an ML-like language, I can write a function which takes a pair
	217	of values of the same type and swaps them.
	218
	219	fun swap r =
	220	let val (x, y) = !r in
	221	r := (y, x)
	222	end
	223
	224	This SML function has its type inferred as `('a * 'a) ref -> unit`, and after
	225	it executes, whatever pair you've passed it will have had its elements
	226	swapped. However, in SML, there is no way of doing this with pairs of type
	227	`('a * 'b) ref`, because you can't _change_ the type of something that exists.
	228	You would have to construct a new pair with a different type.
	229
	230	Mezzo is different. In Mezzo, you don't have types, you have _permissions_,
	231	which you can understand as the permission to access a particular part of the
	232	heap as a particular kind of data. As such, you are allowed to write a function
	233
	234	val mswap: [a, b] (consumes x: mpair a b) -> (\| x @ mpair b a)
	235
	236	This type is interpreted as _consuming_ the permission to access `x` as a mutable
	237	pair of types `a` and `b`, and _producing_ the permission to access `x` as a mutable
	238	pair of types `b` and `a`. To be concise, it swaps the elements of the mutable pair
	239	_without allocating a new pair_ and _in a type-safe way_[^3]. There's a fair amount
	240	of extra complexity I'm omitting, and I encourage the reader to read the relevant
	241	blog posts, because there are a lot of interesting things going on there.
	242
	243	[^3]: Or a permission-safe way, anyway.
	244

+65

-0

posts/a-small-haskell-record-complaint less more

	1	Haskell records have lots of problems. Here's one that came up for me today.
	2
	3	You are allowed to export record members without exporting the constructor,
	4	for example, if you want to ensure some property is true of the constructed
	5	values. In the following example, the field `isNeg` is effectively a function
	6	of the field `num`:
	7
	8	module Foo(mkRec, num, isNeg) where
	9
	10	data Rec = Rec
	11	{ num :: Int
	12	, isNeg :: Bool
	13	}
	14
	15	mkRec :: Int -> Rec
	16	mkRec n = Rec n (n < 0)
	17
	18	Another module can't use the `Rec` constructor, but can observe the values
	19	using the exported accessors
	20
	21	module Bar where
	22
	23	addRecs :: Rec -> Rec -> Rec
	24	addRecs r1 r2 = mkRec (num r1 + num r2)
	25
	26	Unfortunately, there's a hole here, which is that exporing the accessors
	27	allows us to use record update syntax, which means that we can now construct
	28	arbitrary values:
	29
	30	constructAnyRec :: Int -> Bool -> Rec
	31	constructAnyRec n b = mkRec 0 { num = n, isNeg = b }
	32
	33	There is a way around this, namely, by rewriting the original module with
	34	manual accessors for `num` and `isNeg`:
	35
	36	module Foo2(mkRec, num, isNeg) where
	37
	38	data Rec = Rec
	39	{ _num :: Int
	40	, _isNeg :: Bool
	41	}
	42
	43	num :: Rec -> Int
	44	num = _num
	45
	46	isNeg :: Rec -> Bool
	47	isNeg = _isNeg
	48
	49	mkRec :: Int -> Rec
	50	mkRec n = Rec n (n < 0)
	51
	52	However, I'd assert that, morally, the _correct_ thing to do would be to
	53	disallow record update at all if the constructor is not in-scope. The
	54	purpose of hiding the constructor at all is to ensure that a programmer
	55	must perform certain computations in order to construct a valid value,
	56	e.g. to enforce invariants on constructed data (as I'm doing here), or
	57	to avoid the possibility of pattern-matching on data. If you a programmer
	58	hides a constructor but exports its accessors, then generally I'd assert
	59	it's because of the former reason, so it would be sensible to prevent
	60	record update, as you could always write your own updates, if you so
	61	desire.
	62
	63	Of course, pointing out this flaw in light of the other problems with the
	64	Haskell record system is like complaining about the in-flight movie on
	65	a crashing plane, but still.

+350

-0

posts/cherenkov.md less more

	1	# Cherenkov: Basics
	2
	3	A Cherenkov program evalutes to a JSON value, and in fact should include
	4	a standalone program which can be used to evaluate a Cherenkov program to
	5	its JSON equivalent. Cherenkov also includes a little bit of YAML-like
	6	sugar but does not include the full generality of YAML. (For example,
	7	references are not valid Cherenkov: it has something better!)
	8
	9	A simple configuration file might look like
	10
	11	~~~~
	12	# the first host
	13	- hostname: "foo"
	14	forward-agent: true
	15	user: "u1"
	16	# the second host
	17	- hostname: "bar"
	18	local-command: "/bin/setup-shell -q"
	19	user: "u2"
	20	~~~~
	21
	22	This evaluates to the JSON document
	23
	24	~~~~
	25	[ { "hostname": "foo"
	26	, "forward-agent": true
	27	, "user": "u1"
	28	}
	29	, { "hostname": "bar"
	30	, "local-command": "/bin/setup-shell -q"
	31	, "user": "u2"
	32	}
	33	]
	34	~~~~
	35
	36	Note that the comments are removed in the final output, as comments are
	37	not valid JSON.
	38
	39	# Cherenkov: Expressions
	40
	41	Expressions can be introduced using the single-quote character. These
	42	will be evaluated and their result spliced into the final output. If
	43	the expression is a string, a number, or a bool, it will result in
	44	a value of the corresponding JSON type:
	45
	46	~~~~
	47	- 1
	48	- '(0.5 + 1.5)
	49	- '("foo" ++ "bar")
	50	~~~~
	51
	52	This evaluates to
	53
	54	~~~~
	55	[1, 2.0, "foobar"]
	56	~~~~
	57
	58	If the result is a constructor, then it will be represented as an
	59	object with a single key that corresponds to the name of the
	60	constructor:
	61
	62	~~~~
	63	- '(Left 5)
	64	~~~~
	65
	66	evaluates to
	67
	68	~~~~
	69	[{"Left": 5}]
	70	~~~~
	71
	72	If the result is a record, then it will be represented as a JSON
	73	object:
	74
	75	~~~~
	76	- '{x: 2, y: 3}
	77	~~~~
	78
	79	evalutes to
	80
	81	~~~~
	82	[{"x": 2, "y": 3}]
	83	~~~~
	84
	85	The expression language includes several functional features, such as
	86	let-bindings, anonymous functions with a Haskell-like syntax, and
	87	function calls:
	88
	89	~~~~
	90	foo: '(let lst = [1,2,3] in map (\x.x+1) lst)`
	91	~~~~
	92
	93	evaluates to
	94
	95	~~~~
	96	{"foo": [2,3,4]}
	97	~~~~
	98
	99	# Cherenkov: Declarations
	100
	101	Global declarations can be written by prefacing a declaration with
	102	the keyword `with`. These declarations do not appear in the output:
	103
	104	~~~~
	105	with incrAll lst = map (\x.x+1) lst
	106
	107	'(incrAll [1,2,3])
	108	~~~~
	109
	110	evaluates to
	111
	112	~~~~
	113	[2,3,4]
	114	~~~~
	115
	116	Functions can be defined by cases, and can have type signatures as
	117	well. The cases are given in an indented block:
	118
	119	~~~~
	120	with intersperse : List String -> String -> String
	121	(x::xs) i = x ++ i ++ intersperse xs i
	122	[] i = ""
	123	~~~~
	124
	125	Values and types can also be defined in `with`-blocks:
	126
	127	~~~~
	128	with type Server =
	129	{ hostname String
	130	, username String
	131	, portNum Int
	132	}
	133
	134	with defaultServer : Server =
	135	{ hostname: "foo.bar"
	136	, username: "guest"
	137	, portNum: 9999
	138	}
	139
	140	servers:
	141	- 'defaultServer
	142	- '(defaultServer { hostname: "blah" })
	143	- '(defaultServer { port: 9876 })
	144	~~~~
	145
	146	evaluates to
	147
	148	~~~~
	149	{"servers":
	150	[ { "hostname": "foo.bar"
	151	, "username": "guest"
	152	, "postNum": 9999
	153	}
	154	, { "hostname": "blah"
	155	, "username": "guest"
	156	, "postNum": 9999
	157	}
	158	, { "hostname": "foo.bar"
	159	, "username": "guest"
	160	, "postNum": 9876
	161	}
	162	]
	163	~~~~
	164
	165	# Cherenkov: Type System
	166
	167	If you've used Haskell, OCaml, or SML, the type system in Cherenkov
	168	should look vaguely similar. We have records and sums, the former
	169	denoted by curly braces and the latter by square brackets:
	170
	171	~~~~
	172	# defining a sum type
	173	with type MyVal =
	174	[ MyInt Int
	175	, MyBool Bool
	176	]
	177
	178	# using a sum type
	179	with f : MyVal -> Int
	180	MyInt n = n
	181	MyBool b = if \| b -> 1
	182	\| else -> 0
	183
	184	# defining a product type
	185	with type MyRec =
	186	{ myString String
	187	, myFloat Float
	188	}
	189
	190	# using a product type
	191	with g : MyRec -> String
	192	r = r.myString ++ " and " ++ (if \| r.myFloat > 0 -> "positive"
	193	\| else -> "negative")
	194	~~~~
	195
	196	You can also define polymorphic types:
	197
	198	~~~~
	199	# defining a polymorphic sum type
	200	with type Either a b = [ Left a, Right b ]
	201
	202	# defining a polymorphic product type
	203	with type Both a b = { first a, second b }
	204	~~~~
	205
	206	Cherenkov has a much looser type system than some other strongly-typed
	207	langauges. It technically has no type declarations---it has only type
	208	aliases. You can use as many or as few type declarations as you'd like,
	209	as types are inferred if you don't use them. I can use the above functions
	210
	211	~~~~
	212	# using a sum type without type annotations
	213	with f
	214	MyInt n = n
	215	MyBool b = if \| b -> 1
	216	\| else -> 0
	217
	218	# using a product type
	219	with g r =
	220	r.myString ++ " and " ++ (if \| r.myFloat > 0 -> "positive"
	221	\| else -> "negative")
	222	~~~~
	223
	224	And they will have the inferred types
	225
	226	~~~~
	227	f : [ MyInt Int, MyBool Bool ] -> Int
	228	g : { myString String, myFloat Float, rest } -> String
	229	~~~~
	230
	231	That second type deserves a little bit more attention. Cherenkov has
	232	_row polymorphism_, which means that the second version of `g` will
	233	accept as argument not just a record with `myString` and `myFloat`
	234	fields, but a record with any number of additional fields, which is
	235	what the `rest` in the type represents. This can also appear in
	236	the return position: for example, a function like
	237
	238	~~~~
	239	with f r = r { z: r.x + r.y }
	240	~~~~
	241
	242	has an inferred type
	243
	244	~~~~
	245	f : { x: Int, y: Int, rest } -> { x: Int, y: Int, z: Int, rest }
	246	~~~~
	247
	248	as the rest of the fields are passed through unchanged. The `rest`
	249	variable also occurs when we underspecify a constructor: consider
	250	the function `g`
	251
	252	~~~~
	253	with g
	254	Left x = Left (x == 0)
	255	rs = rs
	256	~~~~
	257
	258	which has the inferred type
	259
	260	~~~~
	261	g : [ Left: Int, rest ] -> [ Left: Bool, rest ]
	262	~~~~
	263
	264	i.e. this can be called on any constructor and it will pass it through
	265	unchanged. If this is too lax for us—if we'd prefer to limit the
	266	constructors to a particular set—then we can list them explicitly:
	267
	268	~~~~
	269	with g'
	270	Left x = Left (x == 0)
	271	Right y = Right (y : Bool)
	272	~~~~
	273
	274	which has the inferred type
	275
	276	~~~~
	277	g' : [ Left: Int, Right: Bool ] -> [ Left: Bool, Right: Bool ]
	278	~~~~
	279
	280	Because all types are aliases, it means that recursive types and
	281	polymorphic types are also aliases for explicit type fixpoints and
	282	quantification, so the type alias
	283
	284	~~~~
	285	type MyList a =
	286	[ MyCons (a, MyList a)
	287	, MyNil
	288	]
	289	~~~~
	290
	291	will desugar to the type `\a. b is [ MyCons (a, b), MyNil ]`, where
	292	`\ var . typ` represents quantification as `var is typ` represents
	293	a fixpoint. It is possible but ill-advised to use these in actual
	294	config files—in practice, these exist becuase they provide a nice
	295	underlying theory to a terse and implicit system that preserves
	296	type-safety while requiring a minimum of explicit direction from
	297	the programmer.
	298
	299	# Cherenkov: Totality
	300
	301	Cherenkov is strongly-typed and total. This means that certain invalid
	302	operations are not allowed in Cherenkov programs, and that Cherenkov
	303	programs will never loop forever. Certain restrictions are therefore
	304	placed on Cherenkov programs:
	305
	306	- Cherenkov values may not be recursive, and therefore must be
	307	finite in size
	308	- Cherenkov functions may only be recursive _if they recurse on some
	309	subpart of their input_, or (as a special case) if they recurse
	310	with some numeric parameter that trivially moves towards a terminal
	311	condition.
	312	- Cherenkov sum types may be be recursive, but Cherenkov product
	313	types _must not_ be. (The can be underneath another sum, so e.g.
	314	a type like `rec is { a Int, b rec }` is disallowed as it would
	315	involve an infinite value, whereas
	316	`rec is { a Int, b [ Nothing, Just rec ] }` is fine. This only
	317	works if the sum type in question has a non-recursive constructor:
	318	we cannot use the type
	319	`rec is { a Int, b [ Left rec, Right rec ] }`.
	320
	321	For example, in the following code, `foo` is well-typed and will
	322	also pass the totality checker, and `bar` is well-typed but would
	323	be rejected by the totality checker as it is not trivially
	324	recursing towards a terminal condition:
	325
	326	~~~~
	327	with foo : List Int -> Int
	328	(x::xs) = x + foo xs
	329	[] = 0
	330
	331	with bar : List Int -> Int
	332	(x::xs) = bar (x+1::xs)
	333	[] = bar []
	334	~~~~
	335
	336	This means that some functions which may be well-founded are not
	337	expressible, because it is not trivial to see that they terminate:
	338
	339	~~~~
	340	with collatz : Int -> Bool
	341	n = if \| n == 1 -> true
	342	\| n < 1 -> false
	343	\| mod n 2 == 0 -> collatz (div n 2)
	344	\| else -> collatz (n * 3 + 1)
	345	~~~~
	346
	347	All sum and product types are row-polymorphic, and therefore do not
	348	need to be declared, as those types can be inferred from use. You
	349	can, however, declare aliases for those, if you'd like (especially
	350	as recursive types can get quite complicated without aliases!)

+233

-0

posts/codata less more

	1	# Data
	2
	3	In most statically-typed functional languages like ML or Haskell, the primary
	4	mechanism you have for data types is _named sums_. There are two important
	5	aspects of data types in general: the way you construct them, and the way
	6	you destruct them. Let's start with constructing them. (I'm going to use a
	7	pseudocode in this post that most closely resembles Idris, but is not
	8	supposed to be any particular language.)
	9
	10	To define a type `T`, one will list the possible constructors and their
	11	arguments:
	12
	13	data T = I Int \| B Bool
	14
	15	This example gives you two possible constructors, `MyInt` and `MyBool`,
	16	which you can think of as defining a type and two functions:
	17
	18	T : Type
	19	I : Int -> T
	20	B : Bool -> T
	21
	22	In addition to those, you also get the ability to destruct values created
	23	with those constructors, which is done using the `case` construct.
	24	`case` allows you to determine which constructor was used
	25	to create a particular datum of type `MyType`:
	26
	27	f : T -> Int
	28	f e =
	29	case e of
	30	I i -> i
	31	B b -> if b then 1 else 0
	32
	33	# Codata
	34
	35	So: data is defined by its constructors. What's the opposite of data?
	36	Why, codata, of course! And if data is defined using constructors, then
	37	codata is defined using destructors.
	38
	39	What does this mean? Consider the following codata declaration in a hypothetical
	40	version of Haskell:
	41
	42	codata T' = I' Int & B' Bool
	43
	44	This gives you two possible _destructors_, `I'` and `B'`, which means
	45	you can think of them as two functions
	46
	47	I' : T' -> Int
	48	B' : T' -> Bool
	49
	50	They're not constructors, they're _accessor_ functions. How do we create a
	51	value, then? Using the dual of `case`, which I will call `merge`:
	52
	53	f' : Int -> T'
	54	f' e =
	55	merge
	56	I <- e
	57	B <- if e == 1 then True else False
	58
	59	How do we interpret this snippet of code? This creates a function `f'` which
	60	takes an integer and returns a value of type `MyType'` (the opposite of what
	61	`f` does using `case`.) The `merge` construct lists each possible destructor,
	62	and then the value that will be returned when that destructor is called on it.
	63
	64	Now, you might have noticed that this isn't exactly dual. When we use a
	65	`case` construct, we are _dispatching on the history of a value_, which
	66	means we can _pattern-match over the constructor_. When we use merge as above,
	67	we're just listing
	68
	69	f' :: Int -> MyType'
	70	f' e =
	71	merge' e with
	72	MyInt <- (λi.i)
	73	MyBool <- (λi. if i == 1 then True else False)
	74
	75	Let's look at a few trivial examples to see how these two ways of defining
	76	data are dual. If we have a data definition with a single constructor
	77
	78	data X = X Int
	79
	80	then it follows that
	81
	82	i == case' (X i) of X -> (λj. j)
	83
	84	i.e. if we construct and then destruct a value, then it's the same as having
	85	done nothing. We can assert the corresponding inside-out equivalence for
	86	codata:
	87
	88	codata X' = X' Int
	89	i == X' (merge' i with X' <- (λj. j))
	90
	91	Where data naturally maps to sums, codata naturally maps to products. In
	92	an ML-like language, we can easily define the basic sum type `a + b` in terms
	93	of the named-sum mechanism
	94
	95	data a :+: b = InL a \| InR b
	96
	97	but cannot do the same with products, as all constructors of a type `T` must
	98	be of the form `a -> T` for some specific `a`—consequently, we cannot define
	99	a product type using this named-sums mechanism unless we already have a
	100	product type. (In Haskell, we can, because Haskell allows its constructors
	101	to be curried, so a constructor of the form `a * b -> T` can be curried to
	102	`a -> b -> T`, but let us restrict ourselves to single-argument constructors
	103	for this discussion.)
	104
	105	In a language with this `codata` mechanism, we could easily define that
	106	product type in terms of its two destructors:
	107
	108	codata a :*: b = Fst a & Snd b
	109
	110	In practice, it's often convenient to express codata as records and its
	111	destructors as projection from those records. That means the `MyType'` example
	112	above would look like
	113
	114	codata MyType' = { MyInt' :: Int, MyBool' :: Bool }
	115
	116	f' :: Int -> MyType'
	117	f' e = { MyInt' = e, MyBool' = if e == 1 then True else False }
	118
	119	codata X' = { X' :: Int }
	120	i == { X' = i }.i
	121
	122	but while the syntax here is different, the semantics remain identical.
	123
	124	# Row Polymorphism
	125
	126	Note that each destructor here unambiguously identifies the type of the
	127	codata is projects from—if we use `MyInt'`, then we are necessarily
	128	looking at projecting from a value of type `MyType'`. This is very much
	129	how Haskell records work, but people generally would prefer something
	130	more parametric for records. One system for doing this is called _row
	131	polymorphism_.
	132
	133	In a row-polymorphic record system, your accessors don't name a specific
	134	type, but rather, _some record type for which that accessor holds_. That
	135	means an accessor like `MyInt'` would have a type like
	136
	137	MyInt' :: (MyInt' Int <~ r) => r -> Int
	138
	139	This means that we can write functions which are agnostic as to the rest
	140	of the fields in the records, so long as they contain the fields we want:
	141
	142	h :: (MyInt' Int <~ r, MyBool' Bool <~ r) => r -> Int
	143	h e = if e.MyBool' then e.MyInt' else 0
	144
	145	In the above function, we could supply a record like
	146	`{ A = "foo", B = 7.7, MyBool' = True, MyInt' = 2 }` to `h` and everything
	147	would be fine. Notice that we've dispensed entirely with the type `MyType'`.
	148	This could, hypothetically, cause some usability problems with respect to
	149	type inference: before, if had omitted the type declaration and misspelled
	150	`MyInt'` as `MyItn'`, then our program would have immediately failed
	151	to compile because no such accessor existed, but now, it's possible
	152	for `h` to compile successfully and infer the incorrect type
	153
	154	h :: (MyItn' Int <~ r, MyBool' Bool <~ r) => r -> Int
	155
	156	and either fail to compile because of a failure of inference elsewhere or
	157	succeed with unintended semantics (e.g. if `g` is an exposed library
	158	function that is not used elsewhere in the module.)
	159
	160	Additionally, row-polymorphic records will incur a higher runtime cost
	161	then our previous treatment. If a record is unambiguously identified by
	162	a set of destructors, then we can compile those records as a fixed-size
	163	chunk of memory with known offsets for each destructor. This is not the
	164	case with row-polymorphic records.
	165
	166	Now we've developed codata as the dual of data and identified the record-like
	167	representation of codata. What's the dual of row-polymorphic records?
	168
	169	# Dualizing Row Polymorphism
	170
	171	For convenience sake, let's look at what row polymorphism looks like
	172	with our previous syntax. Each accessor `A`, when used to extract a value of
	173	type `t`, has the type
	174
	175	A :: (A t <~ r) => r -> t
	176
	177	and we construct row-polymorphic codata with a set of destructors and our
	178	`merge` construct, which infers the type produced by the set of destructors
	179	it is given:
	180
	181	g :: Int -> { A :: Int & B :: Bool }
	182	g e =
	183	merge
	184	A <- e
	185	B <- if e == 1 then True else False
	186
	187	That means that the dual of row polymorphism would have polymorphic
	188	constructors, so that when a constructor `C`, when used with a value of
	189	type `t`, has the type
	190
	191	C :: (C t ~> s) => r -> s
	192
	193	and our `case` construct infers the type _destructed_ by the constructors
	194	it matches over:
	195
	196	g' :: [ A :: Int \| B :: Bool ] -> Int
	197	g' e =
	198	case e of
	199	A i -> i
	200	B b -> if b then 1 else 0
	201
	202	As it turns out, this already exists in OCaml, where data types of this
	203	sort are called _polymorphic variants_. The problems I mentioned above
	204	with row-polymorphic records apply here, as well: if I match over `MyItn`
	205	instead of `MyInt`, then that function might compile and its type will
	206	fail to unify elsewhere, or it might be fine. This is particularly
	207	insidious with catch-all patterns: a programmer who wrote this and
	208	missed the type might believe that x is True when it in fact is
	209	False:
	210
	211	isInt e =
	212	case e of
	213	MyItn _ -> True
	214	_ -> False
	215
	216	x = isInt (MyInt 5)
	217
	218	This could be mitigated by forbidding catch-all patterns in a case. The
	219	following code would not even type-check:
	220
	221	isInt e =
	222	case e of
	223	MyItn _ -> True
	224	MyBool _ -> False
	225
	226	x = isInt (MyInt 5)
	227
	228	Like row-polymorphic records, these sums are also harder to represent at
	229	runtime than traditional sums.
	230
	231	# Are They Useful?
	232
	233	Yes. In their own way.⏎

+121

-0

posts/commonplace less more

	1	# So, What's All This, Then?
	2
	3	So, you know what a wiki is, right? It's one of those, with a few
	4	little twists. I'll walk through by doing:
	5
	6	# Just A Wiki
	7
	8	The data model of a wiki is basically that you have some kind of
	9	rich text structure—so you have some formatting primitives, like
	10	bolding or lists or whatnot—as well as the notion of _links_,
	11	which are references to other pages which you can follow. Each
	12	link references a page—or, sometimes, a point within a page—but
	13	each page is basically that.
	14
	15	Commonplace has a bit more structure. It refers to pages as
	16	_scraps_, for reasons we'll see in just a little bit. You
	17	create a new scrap and use Commonplace's mildly idiosyncratic,
	18	LaTeX-inspired markup language to write a page:
	19
	20	~~~~
	21	\strong{Welcome} to this new page! We can link to an outside
	22	page like so: \link{http://gdritter.com\|my home page}.
	23	We can also link to \there{foo\|a wiki page that doesn't exist}.
	24	~~~~
	25
	26	That link will render a different color to indicate that it's a
	27	_broken_ link, as the page it references doesn't yet exist. If
	28	we follow it, we'll have the opportunity to create that page.
	29
	30	~~~~
	31	This is now our \em{Foo} page.
	32	~~~~
	33
	34	Okay, now that we've done that, we can go back and notice that
	35	the link color has changed: we can click on it and it'll take
	36	us to our newly created _foo_ page. There's a little twist on what
	37	Commonplace allows us to do, though. Let's change the initial
	38	page a little bit:
	39
	40	~~~~
	41	\strong{Welcome} to this new page! We can also link to
	42	\link{foo\|another page}. However, we can also do this:
	43
	44	\here{foo}
	45	~~~~
	46
	47	If we look at the page now: look! The entirety of the _foo_ scrap
	48	is now _embedded_ within the larger scrap. We don't just link, we
	49	also _compose_ scraps into larger pages. We can do this to a greater
	50	depth, too:
	51
	52	[image here]
	53
	54	although there is a cut-off depth. Commonplace is also aware of
	55	recursive pages, and will be intelligent about representing them:
	56
	57	[image here]
	58
	59	# Transparent Editing
	60
	61	Now let's try something a little different:
	62
	63	~~~~
	64	This is an outer page.
	65
	66	\page {
	67	This is an inner page.
	68	}
	69	~~~~
	70
	71	Now when we render this, we get this:
	72
	73	[image here]
	74
	75	Notice how we wrote a single page, but when we view it, it appears
	76	to be a scrap embedded within another! What has happened is Commonplace
	77	understands the `\page { ... }` construct to mean, "Create a new scrap
	78	and embed it directly here." This means that we can still have a finer,
	79	more granular structure but without you having to edit all the scraps
	80	individually. The editing view gives you the ability to select a deeper
	81	view (which lets you edit embedded scraps inline) or a shallower view
	82	(which lets you see the `here {...}` references.)
	83
	84	This could be useful in paper-editing: you can write your paper with each
	85	subsection in its own `\page { ... }` block, but later can rearrange
	86	those subsections by switching to a higher-level editing view and then
	87	rearranging the `\here { ... }` references.
	88
	89	# Deeper Structure
	90
	91	All the above pages have been unstructured text, but Commonplace also
	92	understands _structured_ scraps. We've actually already seen one, but
	93	haven't realized it: the `\link { ... }` element. That appeared to be
	94	a markup element, but behind-the-scenes, links work like embedded
	95	`\page { ... }` elements, storing their contents in a separate
	96	scrap. Notice that links have two parts: the actual URL and the
	97	description text.
	98
	99	~~~~
	100	An external link to \link{http://www.example.com\|an example site}.
	101	~~~~
	102
	103	This implies that we can, like scraps, edit a link _individually_,
	104	outside of the context of the page in which it appears. And if we do:
	105
	106	[image here]
	107
	108	It has a separate _named_ field for each part of the link. If we
	109	modify it here, we can see the changes appear back in the page where
	110	we used that link:
	111
	112	[image here]
	113
	114	This can give us tremendous flexibility: if we use a link throughout
	115	our wiki, but the site to which we're linking goes down, we can
	116	transparently replace the link everywhere it appears with an
	117	[Internet Archive version] of the same thing.
	118
	119	Links are only one kind of type, and one with relatively little
	120	structure. A good example of a more deeply-structured type is a
	121

+122

-0

posts/daemontools.html less more

	1	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
	2	<html xmlns="http://www.w3.org/1999/xhtml">
	3	<head>
	4	<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
	5	<meta http-equiv="Content-Style-Type" content="text/css" />
	6	<meta name="generator" content="pandoc" />
	7	<title></title>
	8	<style type="text/css">code{white-space: pre;}</style>
	9	</head>
	10	<body>
	11	<p>I basically always use some program in the <code>daemontools</code> family on my computers. My home laptop and desktop are booted with an init system (<code>runit</code>) based on <code>daemontools</code>, while many of the systems I set up elsewhere boot a vanilla distribution but immediately set up a <code>daemontools</code> service directory as a secondary service management tool. Quite frankly, it's one of the best examples of good Unix design and at this point I wouldn't want to go without it.</p>
	12	<p>This is a high-level introduction to the <em>idea</em> of <code>daemontools</code> rather than a full tutorial: to learn how to set it up in practice, <a href="http://cr.yp.to/daemontools.html">djb's own site</a> as well as <a href="http://rubyists.github.io/2011/05/02/runit-for-ruby-and-everything-else.html">a handful</a><sup><a href="#fn1" class="footnoteRef" id="fnref1">1</a></sup> <a href="http://www.troubleshooters.com/linux/djbdns/daemontools_intro.htm">of others</a> are better references.</p>
	13	<h1 id="what-is-daemontools">What is Daemontools?</h1>
	14	<p>The <em>core</em> of <code>daemontools</code> is just two programs: <code>svscan</code> and <code>supervise</code>. They're very straightforward: <code>svscan</code> takes a single optional argument, and <code>supervise</code> takes a single mandatory one.</p>
	15	<p><a href="http://cr.yp.to/daemontools/svscan.html"><code>svscan</code></a> watches a directory (if none is specified, then it will watch the current working directory) and checks to see if new directories have been added. Any time a new directory is added, it starts an instance of <code>supervise</code> pointing at that new directory<sup><a href="#fn2" class="footnoteRef" id="fnref2">2</a></sup>.</p>
	16	<p>And that's all that <code>svscan</code> does.</p>
	17	<p><a href="http://cr.yp.to/daemontools/supervise.html"><code>supervise</code></a> switches to the supplied directory and runs a script there called <code>./run</code>. If <code>run</code> stops running for <em>any reason</em>, it will be started again (after a short pause, to avoid hammering the system.) It will also not start the <code>./run</code> script if a file called <code>./down</code> exists in the same directory. Extra data about the running process gets stored in a subdirectory called <code>supervise</code>, and a few other tools can be used to prod and modify that data—for example, to send certain signals to kill the running program, to temporarily stop it, or to see how long it has been running.</p>
	18	<p>And that's almost all that <code>supervise</code> does.</p>
	19	<p>One extra minor wrinkle is that if <code>supervise</code> is pointed at a directory that also contains a subdirectory called <code>./log</code>, and <code>./log/run</code> also exists, then it will monitor that executable <em>as well</em> and point the stdout of <code>./run</code> to the stdin of <code>./log/run</code>. This allows you to build a custom logging solution for your services if you'd like. The <code>./log</code> directory is optional.</p>
	20	<p>So, how does this run a system? Well, you point <code>svscan</code> at a directory that contains a subdirectory for each service you want to run. Those services are generally small shell scripts that call the appropriate daemon in such a way that it will stay in the foreground. For example, a script to run <code>sshd</code> might look like:</p>
	21	<pre><code>#!/bin/sh
	22
	23	# redirecting stderr to stdout
	24	exec 2>&1
	25
	26	# the -D option keeps sshd in the foreground
	27	# and the -e option writes log information to stderr
	28	exec /usr/sbin/sshd -D -e</code></pre>
	29	<p>And your directory structure might look like</p>
	30	<pre><code>-+-service/
	31	\|-+-ngetty/
	32	\| \|---run
	33	\| \|-+-log/
	34	\| \|---run
	35	\|-+-sshd/
	36	\| \|---run
	37	\| \|-+-log/
	38	\| \|---run
	39	\|-+-crond/
	40	\| \|---run
	41	\| \|-+-log/
	42	\| \|---run</code></pre>
	43	<p>Once you point <code>svscan</code> at this, you end up having a process tree where <code>svscan</code> is managing multiple <code>service</code> instances which in turn manage their respective services and logging services:</p>
	44	<pre><code>-svscan-+-service-+-ngetty
	45	\| `-log-service
	46	+-service-+-sshd
	47	\| `-log-service
	48	+-service-+-cond
	49	\| `-log-service</code></pre>
	50	<p>This design has some pretty amazing practical advantages, many of which are attributable to the fact that <code>daemontools</code> <em>is written in terms of Unix idioms</em>. The "Unix way" gets a fair amount of derision—some well-deserved, some not—but <code>daemontools</code> is a good example of how embracing the idioms of your system can produce better, more flexible software. Consider the following problems and their <code>daemontools</code> solutions:</p>
	51	<h2 id="testing-a-service-before-you-start-it">Testing a Service Before You Start It</h2>
	52	<p>The <code>./run</code> script is a plain executable. If it runs and stays in the foreground, doing what it should do, it's correct. If it doesn't, then there's a problem. That's also the only code path, which is a sharp contrast to the infamously difficult-to-write <code>sysvinit</code> scripts, where <code>start</code> and <code>stop</code> and <code>status</code> and so forth must all be tested in various system states<sup><a href="#fn3" class="footnoteRef" id="fnref3">3</a></sup>.</p>
	53	<h2 id="starting-and-stoping-a-service">Starting and Stoping a Service</h2>
	54	<p>All you do is create or delete a service directory. The most common way of doing this is to create the service directory elsewhere, and then create a symlink into the service directory to start it. This lets you delete a symlink without deleting the main directory, and furthermore ensures that the 'creation' of the directory is atomic.</p>
	55	<p>Another tool, <code>svc</code>, lets you send signals to the running processes (e.g. <code>svc -p</code> sends a <code>STOP</code> signal, and <code>svc -d</code> sends a <code>TERM</code> signal as well as telling <code>supervise</code> to hold off on restarting the service otherwise.)</p>
	56	<h2 id="express-service-dependencies">Express Service Dependencies</h2>
	57	<p>The <code>daemontools</code> design allows for various helper tools. One of them is <code>svok</code>, which finds out whether a given service is running. This is just another Unix program that will exit with either <code>0</code> if the process is running, or <code>100</code> if it is not. That means we can write</p>
	58	<pre><code>#!/bin/sh
	59	svok postgres \|\| exit 1
	60	exec 2>&1
	61	exec python2 some-web-app.py</code></pre>
	62	<p>and the script will die (prompting <code>svscan</code> to wait a moment and then restart it) unless <code>postgres</code> is already running.</p>
	63	<h2 id="express-resource-limits">Express Resource Limits</h2>
	64	<p><code>daemontools</code> has several other applications that can enforce various resource limits or permissions. These are not part of the service mechanism—instead, they simply modify <em>the current process</em> and then <code>exec</code> some other command. That means that you can easily incorporate them into a service script</p>
	65	<pre><code>#!/bin/sh
	66	exec 2>&1
	67	# change to the user 'sample', and then limit the stack segment
	68	# to 2048 bytes, the number of open file descriptors to 2, and
	69	# the number of processes to 1:
	70	exec setuidgid sample \
	71	softlimit -n 2048 -o 2 -p 1 \
	72	some-small-daemon -n</code></pre>
	73	<p>These aren't actually special, and don't have anything to do with the <code>daemontools</code> service mechanism. Any shell script can incorporate <code>setuidgid</code> or <code>softlimit</code>, even if those scripts have nothing to do with service management!</p>
	74	<h2 id="allow-user-level-services">Allow User-Level Services</h2>
	75	<p>If I want a given <em>user</em> to have their own services that are run <em>as</em> that user, all I need to do is have another <code>svscan</code> running as that user and pointing at another directory, which I can run as another top-level service:</p>
	76	<pre><code>#!/bin/sh
	77	exec 2>&1
	78	exec setuidgid user \
	79	/usr/sbin/svscan /home/user/service</code></pre>
	80	<h1 id="variations">Variations</h1>
	81	<p>What I described above was vanilla <code>daemontools</code>. Other systems are designed for booting entire systems with this kind of service management. Variations on this basic design add various features:</p>
	82	<ul>
	83	<li>The <a href="http://smarden.org/runit/"><code>runit</code></a> package extends <code>supervise</code> with the ability to execute a <code>./finish</code> script if the <code>./run</code> script fails, to do various kinds of cleanup. (<code>runit</code> renames <code>svscan</code> and <code>supervise</code> to <code>runsvdir</code> and <code>runsv</code>, respectively.)</li>
	84	<li>The <a href="http://skarnet.org/software/s6/index.html"><code>s6</code></a> package adds even more options to both core programs (which are here named <code>s6-svscan</code> and <code>s6-supervise</code>) to e.g. limit the maximum number of services or modify how often scanning is done. It additionally allows control of an <code>s6-supervise</code> instance through a directory of FIFOs called <code>./event</code>.</li>
	85	<li>The <a href="http://untroubled.org/daemontools-encore/"><code>daemontools-encore</code></a> package adds even more optional scripts: a <code>./start</code> script which is run before the main <code>./run</code> script and a <code>./stop</code> script after the service is disabled, a <code>./notify</code> script which is invoked when the service changes, and a few others.</li>
	86	<li>The <a href="http://homepage.ntlworld.com/jonathan.deboynepollard/Softwares/nosh.html"><code>nosh</code></a> package is designed as a drop-in replacement for <code>systemd</code> on platforms where <code>systemd</code> cannot run (i.e. any Unix that is not a modern Linux) and so has a lot of utilities that superficially emulate <code>systemd</code> as well as tools which can convert <code>systemd</code> units into <code>nosh</code> service directories. <code>nosh</code> is the most radically divergent of the bunch, but is clearly a <code>daemontools</code> descendant (and incorporates most of the changes from <code>daemontools-encore</code>, as well.)</li>
	87	</ul>
	88	<p>Additionally, all these (except for <code>daemontools-encore</code>) have other capabilities used to set up a Unix system before starting the service-management portion.</p>
	89	<h1 id="the-takeaway">The Takeaway</h1>
	90	<p>The whole <code>daemontools</code> family has two properties which I really appreciate:</p>
	91	<ol style="list-style-type: decimal">
	92	<li>A strong commitment to never parsing anything.</li>
	93	<li>A strong commitment to using Unix as a raw material.</li>
	94	</ol>
	95	<h2 id="why-avoid-parsing">Why avoid parsing?</h2>
	96	<p>Parsing is a surprisingly difficult thing to get right. Techniques for writing parsers vary wildly in terms of how difficult they are, and <a href="http://www.cs.dartmouth.edu/~sergey/langsec/occupy/">parsing bugs are a common source</a> of <a href="https://en.wikipedia.org/wiki/Weird_machine">weird machines</a> in computer security. Various techniques can make parsing easier and less bug-prone, but it's a dangerous thing to rely on.</p>
	97	<p>One way to get around this is to just skip parsing altogether. This is difficult in Unix, where most tools consume and emit plain text (or plain binary.) In other systems, such as in individual programming environments or systems like Windows PowerShell, the everything-is-plain-text requirement is relaxed, allowing tools to exchange structured data without reserializing and reparsing.</p>
	98	<p>The way to avoid parsing <em>in Unix</em> is to use various kinds of structure to your advantage. Take the file system: it can, used correctly, emulate a tree-like structure or a key-value store. For example, one supplementary <code>daemontools</code> utility is <code>envdir</code>, which reads in environment variables not by parsing a string of <code>name=value</code> pairs, but by looking at a directory and turning the filename-to-file-contents mapping into a variable-name-to-variable-content mapping.</p>
	99	<p>You might argue that this is silly—after all, parsing an environment variable declaration is as easy as <code>name=value</code>! Could a system really introduce a security bug in parsing something as simple as that? As it happens, <a href="https://en.wikipedia.org/wiki/Shellshock_%28software_bug%29">the answer is yes.</a></p>
	100	<p>So <code>daemontools</code> avoids parsing by using directories as an organizing principle, rather than parsing configuration files.<sup><a href="#fn4" class="footnoteRef" id="fnref4">4</a></sup> This is very much a design principle in its favor.</p>
	101	<h2 id="what-is-unix-as-a-raw-material">What is "Unix as a raw material"?</h2>
	102	<p>The building blocks of <code>daemontools</code> are the parts of Unix which are common to every modern Unix variant: directories and executables and Unix processes and (in some of its descendants) FIFOs. This means you have a universe of actions you can perform outside of the <code>daemontools</code> universe:</p>
	103	<ul>
	104	<li>Your scripts can be written in anything you'd like, not just a shell language. You could even drop a compiled executable in, at the cost of later maintainability.</li>
	105	<li>Similarly, <code>daemontools</code> services are trivially testable, because they're just plain ol' executables.</li>
	106	<li>Lots of details get moved out of service management because they can be expressed in terms of other building blocks of the system. There's no need for a 'which user do I run as' configuration flag, because that can get moved into a script. (Although that script can also consult an external configuration for that, if you'd like!)</li>
	107	<li>Your directories can be arranged in various ways, being split up or put back together however you'd like.<sup><a href="#fn5" class="footnoteRef" id="fnref5">5</a></sup></li>
	108	</ul>
	109	<p>In contrast, service management with <code>upstart</code> or <code>systemd</code> requires special configuration files and uses various other RPC mechanisms, which means that interacting with them requires using the existing tools and… isn't really otherwise possible. Testing a service with <code>upstart</code> or <code>systemd</code> requires some kind of special testing tool in order to parse the service description and set up the environment it requests. Dependency-management must be built in, and couldn't have been added in afterwards. The same goes for resource limits or process isolation. …and so forth.</p>
	110	<p>"Unix design" has sometimes been used to justify some very poor design choices, but well-done system design that embraces the Unix building blocks in sensible ways has a lot in common with functional program design: small building blocks that have well-defined scope and semantics, well-defined side effects (if any), and fit well into a larger system by making few assumptions about what exists outside of them. <code>daemontools</code> is a perfect example of Unix design done well.</p>
	111	<div class="footnotes">
	112	<hr />
	113	<ol>
	114	<li id="fn1"><p>This one is about <code>runit</code>, not <code>daemontools</code>, but they are similar enough in principle.<a href="#fnref1">↩</a></p></li>
	115	<li id="fn2"><p>It does this not using <code>inotify</code> or some other mechanism, but rather just by waking up every five seconds and doing a quick traversal of everything in the directory. This is less efficient, but also makes fewer assumptions about the platform it's running on, which means <code>daemontools</code> can run just about anywhere.<a href="#fnref2">↩</a></p></li>
	116	<li id="fn3"><p>Of course, your daemon might still rely on state—but that's the fault of your daemon, and no longer inherent in the service mechanism. Contrast this to <code>sysvinit</code>-style scripts, where the only possible API is a stateful one in which the script does different things depending on the process state.<a href="#fnref3">↩</a></p></li>
	117	<li id="fn4"><p>One might argue that this is a little bit disingenuous: after all, you're still invoking shell scripts! If one part of your system avoids parsing, but then you call out to a piece of software as infamously complicated and buggy as <code>bash</code>, all that other security is for naught. But there's no reason that you <em>have</em> to write your scripts in <code>bash</code>, and in fact, the creator of <code>s6</code> has built a new shell replacement for that purpose: namely, <a href="http://skarnet.org/software/execline/"><code>execline</code></a>, which is designed around both security and performance concerns. If you wanted, you could replace all those shell scripts with something else, perhaps something more like the <a href="http://shill.seas.harvard.edu/"><code>shill</code> language</a>. Luckily, the <code>daemontools</code> way is agnostic as to what it's executing, so it is easy to adopt these tools as well!<a href="#fnref4">↩</a></p></li>
	118	<li id="fn5"><p>I personally tend to have a system-level <code>/etc/sv</code> for some services and a user-level <code>/home/gdritter/sv</code> for other services, regardless of whether those services are run in my user-level service tree in <code>/home/gdritter/service</code> or the root-level tree in <code>/service</code>.<a href="#fnref5">↩</a></p></li>
	119	</ol>
	120	</div>
	121	</body>
	122	</html>

+346

-0

posts/daemontools.md less more

	1	I basically always use some program in the `daemontools` family
	2	on my computers. My home laptop and desktop are booted with an
	3	init system (`runit`) based on `daemontools`, while many of the
	4	systems I set up elsewhere boot a vanilla distribution
	5	but immediately set up a `daemontools` service directory as a
	6	secondary service management tool. Quite frankly,
	7	it's one of the best examples of good Unix design
	8	and at this point I wouldn't want to go without it.
	9
	10	This is a high-level introduction to the _idea_ of `daemontools`
	11	rather than a full tutorial: to learn how to set it up in practice,
	12	[djb's own site](http://cr.yp.to/daemontools.html)
	13	as well as
	14	[a handful](http://rubyists.github.io/2011/05/02/runit-for-ruby-and-everything-else.html)[^3]
	15	[of others](http://www.troubleshooters.com/linux/djbdns/daemontools_intro.htm)
	16	are better references.
	17
	18	[^3]: This one is about `runit`, not `daemontools`, but they are
	19	similar enough in principle.
	20
	21	# What is Daemontools?
	22
	23	The _core_ of `daemontools` is just two programs: `svscan` and
	24	`supervise`. They're very straightforward: `svscan` takes a single
	25	optional argument, and `supervise` takes a single mandatory one.
	26
	27	[`svscan`](http://cr.yp.to/daemontools/svscan.html)
	28	watches a directory (if none is specified, then it will
	29	watch the current working directory) and checks to see if new
	30	directories have been added. Any time a new directory is added,
	31	it starts an instance of `supervise` pointing at that new
	32	directory[^1].
	33
	34	[^1]: It does this not using `inotify` or some other
	35	mechanism, but rather just by waking up every five seconds and
	36	doing a quick traversal of everything in the directory. This
	37	is less efficient, but also makes fewer assumptions about the
	38	platform it's running on, which means `daemontools` can run just
	39	about anywhere.
	40
	41	And that's all that `svscan` does.
	42
	43	[`supervise`](http://cr.yp.to/daemontools/supervise.html)
	44	switches to the supplied directory and runs a
	45	script there called `./run`. If `run` stops running for _any reason_, it
	46	will be started again (after a short pause, to avoid hammering the
	47	system.) It will also not
	48	start the `./run` script if a file called `./down` exists in the same
	49	directory. Extra data about the running process gets stored
	50	in a subdirectory called `supervise`, and a few other
	51	tools can be used to prod and modify that data—for example, to send
	52	certain signals to kill the running program, to temporarily stop it,
	53	or to see how long it has been running.
	54
	55	And that's almost all that `supervise` does.
	56
	57	One extra minor wrinkle is that if `supervise` is pointed at a
	58	directory that also contains a subdirectory called `./log`,
	59	and `./log/run` also exists, then it will monitor that
	60	executable _as well_ and point the stdout of `./run` to the stdin of
	61	`./log/run`. This allows you to build a custom logging solution
	62	for your services if you'd like. The `./log` directory is
	63	optional.
	64
	65	So, how does this run a system? Well, you point `svscan` at a
	66	directory that contains a subdirectory for each service you
	67	want to run. Those services are generally small shell scripts
	68	that call the appropriate daemon in such a way that it will
	69	stay in the foreground. For example, a script to run `sshd`
	70	might look like:
	71
	72	~~~~
	73	#!/bin/sh
	74
	75	# redirecting stderr to stdout
	76	exec 2>&1
	77
	78	# the -D option keeps sshd in the foreground
	79	# and the -e option writes log information to stderr
	80	exec /usr/sbin/sshd -D -e
	81	~~~~
	82
	83	And your directory structure might look like
	84
	85	~~~~
	86	-+-service/
	87	\|-+-ngetty/
	88	\| \|---run
	89	\| \|-+-log/
	90	\| \|---run
	91	\|-+-sshd/
	92	\| \|---run
	93	\| \|-+-log/
	94	\| \|---run
	95	\|-+-crond/
	96	\| \|---run
	97	\| \|-+-log/
	98	\| \|---run
	99	~~~~
	100
	101	Once you point `svscan` at this, you end up having a process
	102	tree where `svscan` is managing multiple `service` instances
	103	which in turn manage their respective services and logging
	104	services:
	105
	106	~~~~
	107	-svscan-+-service-+-ngetty
	108	\| `-log-service
	109	+-service-+-sshd
	110	\| `-log-service
	111	+-service-+-cond
	112	\| `-log-service
	113	~~~~
	114
	115	This design has some pretty amazing practical advantages, many of
	116	which are attributable to the fact that `daemontools` _is written in terms of Unix idioms_.
	117	The "Unix way" gets a fair amount of derision—some well-deserved,
	118	some not—but `daemontools` is a good example of how embracing the
	119	idioms of your system can produce better, more flexible software.
	120	Consider the following problems and their `daemontools` solutions:
	121
	122	## Testing a Service Before You Start It
	123
	124	The `./run` script is a plain executable. If it runs and stays
	125	in the foreground, doing what it should do, it's correct. If it
	126	doesn't, then there's a problem. That's also the only code path,
	127	which is a sharp contrast to the infamously difficult-to-write
	128	`sysvinit` scripts, where `start` and `stop` and `status` and so forth
	129	must all be tested in various system states[^5].
	130
	131	[^5]: Of course, your daemon might still rely on state—but that's
	132	the fault of your daemon, and no longer inherent in the service
	133	mechanism. Contrast this to `sysvinit`-style scripts, where the only possible
	134	API is a stateful one in which the script does different things
	135	depending on the process state.
	136
	137	## Starting and Stoping a Service
	138
	139	All you do is create or delete a service directory. The most common
	140	way of doing this is to create the service directory elsewhere, and
	141	then create a symlink into the service directory to start it. This
	142	lets you delete a symlink without deleting the main directory, and
	143	furthermore ensures that the 'creation' of the directory is atomic.
	144
	145	Another tool, `svc`, lets you send signals to the running processes
	146	(e.g. `svc -p` sends a `STOP` signal, and `svc -d` sends a `TERM`
	147	signal as well as telling `supervise` to hold off on restarting
	148	the service otherwise.)
	149
	150	## Express Service Dependencies
	151
	152	The `daemontools` design allows for various helper tools. One of them
	153	is `svok`, which finds out whether a given service is running. This
	154	is just another Unix program that will exit with either `0` if
	155	the process is running, or `100` if it is not. That means we can
	156	write
	157
	158	~~~~
	159	#!/bin/sh
	160	svok postgres \|\| exit 1
	161	exec 2>&1
	162	exec python2 some-web-app.py
	163	~~~~
	164
	165	and the script will die (prompting `svscan` to wait a moment and
	166	then restart it) unless `postgres` is already running.
	167
	168	## Express Resource Limits
	169
	170	`daemontools` has several other applications that can enforce
	171	various resource limits or permissions. These are not part of the
	172	service mechanism—instead, they simply modify _the current
	173	process_ and then `exec` some other command. That means that you
	174	can easily incorporate them into a service script
	175
	176	~~~~
	177	#!/bin/sh
	178	exec 2>&1
	179	# change to the user 'sample', and then limit the stack segment
	180	# to 2048 bytes, the number of open file descriptors to 2, and
	181	# the number of processes to 1:
	182	exec setuidgid sample \
	183	softlimit -n 2048 -o 2 -p 1 \
	184	some-small-daemon -n
	185	~~~~
	186
	187	These aren't actually special, and don't have anything to do with
	188	the `daemontools` service mechanism. Any shell script
	189	can incorporate `setuidgid` or `softlimit`, even if those scripts
	190	have nothing to do with service management!
	191
	192	## Allow User-Level Services
	193
	194	If I want a given _user_ to have their own services that are run
	195	_as_ that user, all I need to do is have another `svscan` running as
	196	that user and pointing at another directory, which I can run as another
	197	top-level service:
	198
	199	~~~~
	200	#!/bin/sh
	201	exec 2>&1
	202	exec setuidgid user \
	203	/usr/sbin/svscan /home/user/service
	204	~~~~
	205
	206	# Variations
	207
	208	What I described above was vanilla `daemontools`. Other systems
	209	are designed for booting entire systems with this kind of service
	210	management. Variations on this basic design add various features:
	211
	212	* The [`runit`](http://smarden.org/runit/) package
	213	extends `supervise` with the ability to
	214	execute a `./finish` script if the `./run` script fails, to do
	215	various kinds of cleanup. (`runit` renames `svscan` and
	216	`supervise` to `runsvdir` and `runsv`, respectively.)
	217	* The [`s6`](http://skarnet.org/software/s6/index.html) package
	218	adds even more options to both core
	219	programs (which are here named `s6-svscan` and
	220	`s6-supervise`) to e.g. limit the maximum number of services
	221	or modify how often scanning is done. It additionally allows
	222	control of an `s6-supervise` instance through a directory of
	223	FIFOs called `./event`.
	224	* The [`daemontools-encore`](http://untroubled.org/daemontools-encore/)
	225	package adds even more optional
	226	scripts: a `./start` script which is run before the main
	227	`./run` script and a `./stop` script after the service is
	228	disabled, a `./notify` script which is invoked when the
	229	service changes, and a few others.
	230	* The [`nosh`](http://homepage.ntlworld.com/jonathan.deboynepollard/Softwares/nosh.html)
	231	package is designed as a drop-in replacement for
	232	`systemd` on platforms where `systemd` cannot run (i.e. any
	233	Unix that is not a modern Linux) and so has a lot of
	234	utilities that superficially emulate `systemd` as well as
	235	tools which can convert `systemd` units into `nosh` service
	236	directories. `nosh` is the most radically divergent of the
	237	bunch, but is clearly a `daemontools` descendant (and
	238	incorporates most of the changes from `daemontools-encore`,
	239	as well.)
	240
	241	Additionally, all these (except for `daemontools-encore`) have
	242	other capabilities used to set up a Unix system before starting
	243	the service-management portion.
	244
	245	# The Takeaway
	246
	247	The whole `daemontools` family has two properties which I really
	248	appreciate:
	249
	250	1. A strong commitment to never parsing anything.
	251	2. A strong commitment to using Unix as a raw material.
	252
	253	## Why avoid parsing?
	254
	255	Parsing is a surprisingly difficult thing to get right. Techniques
	256	for writing parsers vary wildly in terms of how difficult they are, and
	257	[parsing bugs are a common source](http://www.cs.dartmouth.edu/~sergey/langsec/occupy/)
	258	of
	259	[weird machines](https://en.wikipedia.org/wiki/Weird_machine) in computer
	260	security. Various techniques can make parsing easier and less
	261	bug-prone, but it's a dangerous thing to rely on.
	262
	263	One way to get around this is to just skip parsing altogether. This
	264	is difficult in Unix, where most tools consume and emit plain text
	265	(or plain binary.) In other systems,
	266	such as in individual programming environments or systems like
	267	Windows PowerShell, the everything-is-plain-text requirement is
	268	relaxed, allowing tools to exchange structured data without
	269	reserializing and reparsing.
	270
	271	The way to avoid parsing _in Unix_ is to use various kinds of
	272	structure to your advantage. Take the file system:
	273	it can, used correctly, emulate a tree-like structure or a
	274	key-value store. For example, one supplementary `daemontools` utility is `envdir`,
	275	which reads in environment variables not by parsing a string of
	276	`name=value` pairs, but by looking at a directory and turning the
	277	filename-to-file-contents mapping into a variable-name-to-variable-content
	278	mapping.
	279
	280	You might argue that this is silly—after all, parsing an environment
	281	variable declaration is as easy as `name=value`! Could a system really
	282	introduce a security bug in parsing something as simple as that? As it
	283	happens, [the answer is yes.](https://en.wikipedia.org/wiki/Shellshock_%28software_bug%29)
	284
	285	So `daemontools` avoids parsing by using directories as an organizing
	286	principle, rather than parsing configuration files.[^2] This is very much
	287	a design principle in its favor.
	288
	289	[^2]: One might argue that this is a little bit disingenuous: after all,
	290	you're still invoking shell scripts! If one part of your system avoids
	291	parsing, but then you call out to a piece of software as infamously complicated
	292	and buggy as `bash`, all that other security is for naught. But
	293	there's no reason that you _have_ to write your scripts in `bash`, and in fact,
	294	the creator of `s6` has built a new shell replacement for that purpose:
	295	namely, [`execline`](http://skarnet.org/software/execline/), which is designed
	296	around both security and performance concerns. If you wanted, you could replace
	297	all those shell scripts with something else, perhaps something more like
	298	the [`shill` language](http://shill.seas.harvard.edu/). Luckily, the
	299	`daemontools` way is agnostic as to what it's executing, so it is
	300	easy to adopt these tools as well!
	301
	302	## What is "Unix as a raw material"?
	303
	304	The building blocks of `daemontools` are the parts of Unix which are
	305	common to every modern Unix variant: directories and executables and
	306	Unix processes and (in some of its descendants) FIFOs.
	307	This means you have a universe of actions you can perform
	308	outside of the `daemontools` universe:
	309
	310	- Your scripts can be written in anything you'd like, not just a
	311	shell language. You could even drop a compiled executable in, at the
	312	cost of later maintainability.
	313	- Similarly, `daemontools` services are trivially testable, because
	314	they're just plain ol' executables.
	315	- Lots of details get moved out of service management because they
	316	can be expressed in terms of other building blocks of the system.
	317	There's no need for a 'which user do I run as' configuration flag,
	318	because that can get moved into a script. (Although that script
	319	can also consult an external configuration for that, if you'd like!)
	320	- Your directories can be arranged in various ways, being split up
	321	or put back together however you'd like.[^4]
	322
	323	[^4]: I personally tend to have a system-level
	324	`/etc/sv` for some services and a user-level `/home/gdritter/sv` for
	325	other services, regardless of whether those services are run in my
	326	user-level service tree in `/home/gdritter/service` or the root-level
	327	tree in `/service`.
	328
	329	In contrast, service management with `upstart` or `systemd` requires
	330	special configuration files and uses various other RPC mechanisms,
	331	which means that interacting with them requires using the existing
	332	tools and… isn't really otherwise possible. Testing a service with
	333	`upstart` or `systemd` requires some kind of special testing tool
	334	in order to parse the service description and set up the environment
	335	it requests. Dependency-management must be built in, and couldn't
	336	have been added in afterwards. The same goes for resource limits
	337	or process isolation. …and so forth.
	338
	339	"Unix design" has sometimes been used to justify some very poor
	340	design choices, but well-done system design that embraces the Unix
	341	building blocks in sensible ways has a lot in common with functional
	342	program design: small building blocks that have well-defined
	343	scope and semantics, well-defined side effects (if any), and fit
	344	well into a larger system by making few assumptions about what
	345	exists outside of them. `daemontools` is a perfect example of Unix
	346	design done well.

+49

-0

posts/file-system less more

	1	The common mantra of Unix is that "everything is a file", and this is
	2	to a mild extent true in most Unix variants, but Unix only takes that
	3	concept so far. Lots of things are file-like until you prod them, and
	4	then behave slightly unlike files—say, named pipes, which can sometimes
	5	work as files and sometimes don't quite work as files.
	6
	7	The Plan 9 operating system (and its successor, Inferno) takes the
	8	"everything is a file" notion much further. This is because _all_ file
	9	systems are exposed via an indirect API, which means that it's quite
	10	easy to build a file system by simply implementing the appropriate
	11	socket communications. Linux has the ability to do this with FUSE, but
	12	it's not used at all to the degree that it is in Plan 9.
	13
	14	I'm going to go through [...]
	15
	16	# Representing Data through a File System
	17
	18	Because you can build file systems easily, you can easily take a piece
	19	of data and represent it as a file structure instead of a collection of
	20	bytes. Plan 9 ships with, for example, `tarfs` and `9660srv`, which can
	21	take tarballs and ISO images respectively and mount them as read-only
	22	file systems.
	23
	24	Here is a file system I've written in 9P which takes a JSON file and
	25	mounts it as a file system. Objects get represented as directories
	26	with files or subdirectories named after each key. Arrays get
	27	represented as directories with files or subdirectories named after
	28	the indices. Everything else becomes a file containing the JSON
	29	serialization of that piece of data.
	30
	31	$ cat sample.json
	32	{ "names": [ "Alouise"
	33	, "Eberhardt"
	34	, "Furtwangler"
	35	]
	36	, "size": 3
	37	}
	38	$ mount.json sample.json tmp
	39	$ ls -R tmp
	40	tmp/
	41	names size
	42
	43	tmp/names
	44	1 2 3
	45	$ cat tmp/size && echo
	46	3
	47	$ cat tmp/names/1 && echo
	48	"Eberhardt"
	49

+257

-0

posts/formats.md less more

	1	I want to get away from syntax—because really, who actually likes
	2	bikeshedding?—but first, I want to give the details of a few
	3	syntaxes of my implementation. In particular, three different
	4	languages for markup and data storage that I've used in personal
	5	projects, split apart into standalone implementations in case
	6	they're useful for anyone else.
	7
	8	I didn't invent any of these formats, but I did implement and
	9	formalize them, which implicates me at least a little bit in
	10	their creation. Of course, whenever you invent a markup/data
	11	storage format, you should ask yourself two questions:
	12
	13	1. What purpose does this serve that isn't served by an existing
	14	format?
	15	2. What purpose does this serve that isn't served by
	16	[S-expressions](http://en.wikipedia.org/wiki/S-expression)
	17	specifically?
	18
	19	For each of these formats, I will include the following:
	20
	21	- A description of the abstract (parsed) representation that
	22	the format describes, as well as an example of the format
	23	and its corresponding abstract structure.
	24	- Answers to the above questions: i.e., why does this format
	25	exist, and why not use an existing format for the same
	26	purpose?
	27	- A _formal grammar_ which defines the language. There should
	28	be no ambiguity in the resulting grammar, and in particular,
	29	it should be the case that, for all `x`, `x = decode(encode(x))`.
	30	Note that the opposite needn't be true (e.g. because of comments,
	31	indentation, &c.)
	32	- A C implementation that relies on _no external libraries_.
	33	This _includes_ libc, which means that the implementations
	34	_do not rely on the presence of_ `malloc`. This is done
	35	by only allowing access to the parsed structure through a
	36	callback, and threading the parsed structure through the
	37	stack.
	38	- A C implementation that relies on _no external libraries_
	39	but expects the user to pass in their own allocator, which
	40	can (and will be, in most cases) `malloc`, but doesn't need
	41	to be.
	42	- A Haskell implementation whose imports are limited to the
	43	types that their APIs expose that are not included in `base`,
	44	e.g. `ByteString`, `Text`, `Vector`, and in one case, `XML`.
	45
	46	# NML
	47
	48	## Structure
	49
	50	All NML documents have the same abstract structure as an
	51	XML document. The following NML document:
	52
	53	~~~~
	54	<one<two\|three>\|
	55	four
	56	<five\|six>
	57	>
	58	~~~~
	59
	60	corresponds to the following XML document:
	61
	62	~~~~
	63	<one two="three">
	64	four
	65	<five>six</five>
	66	</one>
	67	~~~~
	68
	69	## Why
	70
	71	1. NML is a format originally proposed by <Erik Naggum>, who is
	72	well-known for his vituperative rants in support of Common
	73	Lisp and against XML. NML was originally intended as a
	74	demonstration of why you _don't_ need an attribute/element
	75	distinction, but variations on it were briefly adopted by
	76	<some people> as a somewhat more human-friendly way of writing
	77	XML.
	78
	79	It was never fleshed out into a full specification by
	80	Naggum—and indeed, most of the users modified it in some
	81	way or other—but I have followed Naggum's examples and extended
	82	it with specifications that deal with the full generality of
	83	XML, including features like namespaces. (Hence, GNML, or
	84	Generalized NML.) Finally, I specified
	85	that GNML allows for Unicode input and in fact does not understand
	86	character references at all: escaped Unicode characters in GNML
	87	correspond to character references in XML, and vice versa.
	88
	89	My original use for GNML was as a tool for viewing and
	90	producing XML, i.e. I could write NML and on-the-fly convert
	91	it to valid XML that tools could understand, and dump XML
	92	in pleasantly human-readable GNML. If your data will never be
	93	seen by human eyes, it would be better to simply use XML—the
	94	tools for working with XML are far more numerous than tools
	95	for working with my particular dialect of GNML.
	96
	97	2. If you can use S-Expressions instead, then use S-Expressions.
	98	It is in fact trivial to map S-Expressions to XML and vice
	99	versa, and almost every language has mature libraries for
	100	parsing and emitting S-Expressions.
	101
	102	## Alternatives
	103
	104	- [XML]()
	105	- [S-Expressions]()
	106
	107	## Formal Grammar
	108	## Implementations
	109
	110	# NDBL
	111
	112	## Structure
	113
	114	All NDBL documents take the form of a sequence of sequences
	115	of key-value pairs, where a _key_ is a string of non-whitespace
	116	characters and a _value_ is any string. NDBL source files must
	117	be UTF-8 encoded. The following NDBL document:
	118
	119	~~~~
	120	a=b # x
	121	c="d e"
	122	f= g=h
	123	~~~~
	124
	125	corresponds to the following abstract structure expressed
	126	with in Python syntax:
	127
	128	~~~~
	129	[ [ ("a", "b"), ("c", "d e") ]
	130	, [ ("f", ""), ("g", "h") ]
	131	]
	132	~~~~
	133
	134	## What
	135
	136	NDBL keys are always unquoted strings of Unicode characters
	137	which contain neither whitespace nor the character `=`, but
	138	may contain other punctuation. NDBL values are strings, which
	139	may be unquoted (in which case they are terminated by
	140	whitespace) or may be quoted (in which case they are terminated
	141	by the first quotation mark character not preceded by a
	142	backslash.) Comments are started by whitespace followed by a
	143	pound sign `#` and continue to the end of a line. A new group
	144	is started by a key-value pair at the beginning of a line; so
	145	long as subsequent key-value pairs do not start immediately
	146	after a newline, they are added to the previous group.
	147
	148	## Why
	149
	150	1. NDBL is so-named because it is based on the configuration
	151	format of Plan 9's NDB utility, with some minor modifications.
	152	NDBL is a _configuration_ format, not a data storage or markup
	153	format, and as such does not have a complicated internal
	154	structure. In fact, the grammar of NDBL is _regular_, and could
	155	be recognized by a regular expression. (It cannot be _parsed_
	156	by a regular expression as that would require the ability to
	157	perform a submatch under a Kleene star.)
	158
	159	The simplest configuration format is really just key-value
	160	pairs (which is one of the reasons that environment variables
	161	are often used for configuration.) NDBL adds only a bare
	162	minimum on top of that: it adds a notion of grouping to
	163	sequences of key-value pairs. Crucially, the groups are _not_
	164	maps, because the same key may appear more than once:
	165
	166	~~~~
	167	# a single group with repeated keys:
	168	user=terry
	169	file=foo.txt
	170	file=bar.txt
	171	file=baz.txt
	172	~~~~
	173
	174	The reason NDBL might be desirable is that many other
	175	configuration formats are _very_ heavyweight. TOML was
	176	originally created as a response to the complexity of YAML,
	177	and TOML is itself quite complicated. Simpler formats,
	178	such as JSON, lack the ability to have comments—a must for
	179	configuration—and many other configuration formats are
	180	ad-hoc and have no formal specification (e.g. INI, which
	181	TOML greatly resembles.) So, NDBL was codified as a
	182	response to this.
	183
	184	2. You should probably just use S-Expressions.
	185
	186	## Alternatives
	187
	188	- [YAML]()
	189	- [INI]()
	190	- [TOML]()
	191	- [S-Expressions]()
	192
	193	## Formal Grammar
	194	## Implementations
	195
	196	# TeLML
	197
	198	## Structure
	199
	200	All TeLML documents take the form of sequences of _fragments_,
	201	where a fragment is either a chunk of Unicode text or a
	202	_TeLML tag_, which consists of a tag name and a sequence
	203	of sequences of fragments.
	204
	205	The following TeLML document:
	206
	207	~~~~
	208	a \b c \d{e,\f{}, g }
	209	~~~~
	210
	211	Corresponds to the following abstract structure expressed
	212	in Python syntax:
	213
	214	~~~~
	215	[ "a "
	216	, Tag(name="b")
	217	, " c "
	218	, Tag(name="d",
	219	args=[ "e"
	220	, Tag(name="f",
	221	args=[""])
	222	, " g "
	223	])
	224	]
	225	~~~~
	226
	227	## What
	228
	229	If a document never contains any instance of the characters
	230	`\`, `{`, or `}`, then it is a TeLML document of a single text
	231	fragment. Tags with an empty argument list are written as
	232	`\tagname`, while tags with an argument list are followed by
	233	curly braces, with individual arguments separated by commas,
	234	e.g., `\tagname{arg1,arg2,...,argn}`. Curly braces without
	235	a preceding tag are used for grouping, and do not appear in
	236	the generated source; this may sometimes be useful to
	237	separate argument-less tags from adjacent text fragments:
	238	`\foobar` is a single tag called `foobar`, and `\foo{bar}`
	239	is a single tag `foo` with an argument `bar`, but
	240	`{\foo}bar` is two fragments: a nullary tag `foo` followed
	241	by a text fragment `bar`.
	242
	243	## Why
	244
	245	1. TeLML grew out of Markdown's lack of extensibility. What
	246	I wanted was a user-extensible markup language that had
	247	the ability to include rich structures
	248
	249	## Alternatives
	250
	251	- [SGML]()
	252	- [DocBook]()
	253	- [MediaWiki]()
	254	- [S-Expressions]()
	255
	256	## Formal Grammar
	257	## Implementations

+39

-0

posts/multiwhile.md less more

	1	I read a paper recently that described a new looping construct. This
	2	is admittedly pretty weird: we've pretty much settled on looping
	3	constructs in imperative languages by now, and with a few exceptions
	4	I don't see much change. You've got your `while` and (increasingly
	5	rarely) your `do while` loop, which moves the test around:
	6
	7	~~~~
	8	cond -> true
	9	-----------------------------------------------------------
	10	while (cond) { body } -> body; while (cond) { body }
	11
	12	cond -> false
	13	-------------------------------------
	14	while (cond) { body } -> ()
	15
	16
	17	cond -> true
	18	------------------------------------------------------------------
	19	do { body } while (cond) -> body; do { body } while (cond)
	20
	21	cond -> false
	22	------------------------------------------
	23	do { body } while (cond) -> body
	24	~~~~
	25
	26	We also have the nice scaffolding provided to us by `for` loops, which
	27	are nice sugar for `while` loops:
	28
	29	~~~~
	30	for (init, iter, cond) { body } => init; do { body; iter } while (cond)
	31	~~~~
	32
	33	and we have flavors of `foreach` loops, which add awareness of some
	34	underlying iterator protocol or collection type, which introduces an
	35	element of binding
	36
	37	~~~~
	38	foreach (x in coll) { body } => coll.iter(); while (coll.has_next()) { x = coll.next(); body }
	39	~~~~

+50

-0

posts/orwell.md less more

	1	George Orwell has a famous essay called "Politics and the English
	2	Language," which is widely cited, discusses, and endorsed in
	3	all manner of situations. Across the intellectual landscape, in
	4	many different contexts, people endorse it as wonderful writing
	5	advice and an excellent critique of vapid political discourse.
	6
	7	Orwell puts forth several rules for clear writing, all of which
	8	are motivated by a modicum of reasoning. All of them, when
	9	followed to their conclusion, can only produce confused and
	10	impenetrable prose. Orwell admits that he does not always
	11	follow them—and of course the last rule gives the reader
	12	explicit permission to violate the rules in certain
	13	circumstances—but it very much makes one wonder what the point
	14	of "rules" are if they are made to be broken so regularly.
	15
	16	Let's look at each of these six rules:
	17
	18	# 1. Never use a metaphor, simile, or other figure of speech which you are used to seeing in print.
	19
	20	Orwell's complaints about metaphor are twofold: one, that overuse
	21	of these metaphors robs them of their meaning, and two, that
	22	they can become divorced from their original metaphorical meaning
	23	and become abstract phrases.
	24
	25	The first objection presupposes that meaning is necessarily
	26	diminished through use, and in some cases, this may very well
	27	be true. The impact of a word like _fuck_ is of course lessened
	28	if it is used incessantly, and phrases that necessarily connote
	29	some kind of unusualness or uniqueness—like _Achilles' Heel_,
	30	which connotes a unique but absolute weakness—can suffer a
	31	semantic shift such that they start to connote a less unique
	32	situation.
	33
	34	Those, however, are special cases. In general, meaning is not a
	35	quantity that diminishes with use—it is a quality that shifts,
	36	and no number of essays can prevent metaphors or even just words
	37	from shifting their meanings over time. This is no more or less
	38	true of figures of speech than it is of words: who today uses
	39	the word 'silly' to mean "blessed", or the word 'bonanza' to
	40	mean "fair weather at sea"[^bonanza]?
	41
	42	[^bonanza]: For that matter, who today uses the word 'bonanza'?
	43
	44	As for the larger point: the reason that metaphors are useful
	45	is because they can take a situation and explicate it via
	46	similarity. A writer shouldn't overuse them, but on the other
	47	hand, why avoid a common metaphor if it is appropriate to the
	48	situation at hand? I could attempt to come up with an uncommon
	49	metaphor to describe the "flow of money"—money is not a liquid
	50	and does not flow!—but why do this when I could

+126

-0

posts/script-based-data-analysis less more

	1	Fulcrum was born out of the fact that I like pivot tables while I hate
	2	spreadsheets.
	3
	4	Pivot tables, if you haven't used them, are a very nice facility in most
	5	spreadsheet programs. They're an interactive facility for analyzing and
	6	working with various kinds of data---you list which labels you want along
	7	the columns and rows and how to reduce your data down (e.g. whether you
	8	want to sum or average the numbers in question), and you can simply
	9	drag-and-drop field identifiers and whatnot in order to get a new view
	10	on your data. It's very nice.
	11
	12	Unfortunately, spreadsheet software is very tedious. Quite often, I
	13	would like to do some kind of automated processing on the data I've
	14	produced, and doing so is often an exercise in horrible frustration.
	15	The scripting languages included with spreadsheet software are
	16	of a uniformly terrible quality and the APIs provided are generally
	17	quite inflexible.
	18
	19	So, I wrote my own from scratch in Haskell. It was a reasonably nice
	20	design---it involved using the lens package to select out particular
	21	fields from a data structure, and sets of reductions on the data.
	22
	23	In fact, it was nice enough that it took only trivial change to turn
	24	it from a program for specific data processing to a library for
	25	data processing in general. It is (almost) totally agnostic as to what
	26	kind of data you put in and what kind of data you get out, with a few
	27	caveats that I'll get into.
	28
	29	So, here's what Fulcrum looks like:
	30
	31	# A Short Example
	32
	33	Let's assume we have an `Employee` type as follows:
	34
	35	~~~~
	36	> data Department = Marketing \| Sales \| HR
	37	> deriving (Eq, Show, Ord)
	38
	39	> data Employee = Employee
	40	> { name :: String
	41	> , birthdate :: UTCTime
	42	> , dept :: Department
	43	> } deriving (Eq, Show)
	44	~~~~
	45
	46	We want to find out the average employee age for both management and
	47	non-management. (Fulcrum is overkill for this, but bear with me.)
	48	First step is to find out the age of an employee _at all_, which
	49	requires knowing what today is. So, we'll write a function we can
	50	pass the current day to:
	51
	52	~~~~
	53	> getAge :: UTCTime -> Employee -> NominalDiffTime
	54	> getAge today e = diffUTCTime today (birthdate e)
	55	~~~~
	56
	57	Now that we can find an employee's age, we can build a plan around
	58	it. Again, we don't know what _today_ is, so we'll actually write
	59	a function that, given a current day, will produce a plan for it:
	60
	61	~~~~
	62	> myPlan :: UTCTime -> Plan Employee NominalDiffTime
	63	> myPlan today = Plan
	64	> { planName = "Average Employee Age"
	65	~~~~
	66	if we graph it, we'll use the plan name as the title. We can put
	67	basically anything we want.
	68	~~~~
	69	> , planFCName = "age"
	70	> , planFocus = getAge
	71	~~~~
	72	The focus is whatever field we'd like to extract as the relevant
	73	dependent variable. We pass in a function which can take us from
	74	the row type to a focus type.
	75	~~~~
	76	> , planFilters = const True
	77	~~~~
	78	We don't want to filter out any rows at all, so the filter function
	79	just returns True to every row.
	80	~~~~
	81	> , planMaps = id
	82	~~~~
	83	We also don't want to do any pre-emptive modification to the data,
	84	so the planMaps just return the rows unchanged.
	85	~~~~
	86	> , planAxis = select department "department"
	87	~~~~
	88	The `planAxis` field also takes a getter (wrapped in a `Select`
	89	constructor, which I'll explain below) to select based on an
	90	independent variable. In this case, we're selecting on which
	91	department the employee is in, so we'll turn the record accessor
	92	function into a `Getter`. The `select` function also wants to
	93	know the name of the axis, for graphing and metadata purposes,
	94	so we'll give it that, too.
	95	~~~~
	96	> , planLines = mempty
	97	~~~~
	98	If we wanted to have multiple data series on a chart, we'd use
	99	the `planLines` field, but this example won't require it, so we
	100	can supply it a null value. `Select` values form a monoid in
	101	which combining two `Select`s produces a `Select` that produces
	102	a pair of values, so their identity is an "empty" `Select` that
	103	always produces `()`.
	104	~~~~
	105	> , planMerge = avg
	106	> }
	107	~~~~
	108	And here we supply the function `avg`, which takes the average of
	109	a list of numbers, as the relevant merge function.
	110
	111
	112
	113	# Plans
	114
	115	Fulcrum is based around the idea of a `Plan`, which is a recipe for
	116	taking a dataset and producing specific values. The `Plan` type is
	117	a record containing a bunch of fields which you have to specify,
	118	and can produce a table-ish (if you squint) dataset in the form of
	119	a map using the `runPlanToMap` function. A few auxiliary graphing
	120	functions take the `Plan` and dataset directly, as well.
	121
	122	`Plan`s are parameterized by two types: the first is the type of the
	123	individual entries in your dataset, and the second is the type of
	124	the value that your plan will extract. If you're going to graph
	125	your plan, the latter has to be numeric, but otherwise, it can just
	126	be any Haskell type.⏎

+206

-0

posts/semigroup.html less more

	1	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
	2	<html xmlns="http://www.w3.org/1999/xhtml">
	3	<head>
	4	<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
	5	<meta http-equiv="Content-Style-Type" content="text/css" />
	6	<meta name="generator" content="pandoc" />
	7	<title></title>
	8	<style type="text/css">code{white-space: pre;}</style>
	9	<style type="text/css">
	10	table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
	11	margin: 0; padding: 0; vertical-align: baseline; border: none; }
	12	table.sourceCode { width: 100%; line-height: 100%; }
	13	td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
	14	td.sourceCode { padding-left: 5px; }
	15	code > span.kw { color: #007020; font-weight: bold; }
	16	code > span.dt { color: #902000; }
	17	code > span.dv { color: #40a070; }
	18	code > span.bn { color: #40a070; }
	19	code > span.fl { color: #40a070; }
	20	code > span.ch { color: #4070a0; }
	21	code > span.st { color: #4070a0; }
	22	code > span.co { color: #60a0b0; font-style: italic; }
	23	code > span.ot { color: #007020; }
	24	code > span.al { color: #ff0000; font-weight: bold; }
	25	code > span.fu { color: #06287e; }
	26	code > span.er { color: #ff0000; font-weight: bold; }
	27	</style>
	28	</head>
	29	<body>
	30	<p>As part of recent type-refactoring efforts in Haskell, a discussion about adding <a href="https://mail.haskell.org/pipermail/libraries/2015-March/025381.html"><code>Semigroup</code> as a parent class of <code>Monoid</code></a> has been bouncing around the mailing list. From a theoretical point of view, this is a great idea: it is more flexible than the current approach that would allow for greater expressibility.</p>
	31	<p>From a <em>practical</em> point of view, however, I am inclined to oppose it. Not because it is <em>in itself</em> a bad change—it's a very reasonable change that has advantages for new code—but because I have, in the past, had to update large systems written in Haskell after GHC updates, and therefore I know that <em>this kind of change has a cost</em>. The Applicative-Monad changes seem to have made way for the Foldable-Traversable Prelude, but those have in turn inspired a number of suggestions for modifications to the Haskell standard library, each one of which, taken on its own, is reasonable, but taken as a mass, mean <em>much more work</em> for maintainers. This is <em>especially</em> true if we continue on this path of making minor refactorings at each release: each year a project will need changes, or it will quickly bit-rot beyond utility.</p>
	32	<h1 id="default-superclass-instances">Default Superclass Instances</h1>
	33	<p>There is, however, an alternative I would like to discuss. Some of these changes—especially the <code>Semigroup</code>/<code>Monoid</code> split—seem to involve taking the functionality of a class and splitting it into multiple smaller classes. For example, we can think of the <code>Semigroup</code>/<code>Monoid</code> change as converting</p>
	34	<pre class="sourceCode haskell"><code class="sourceCode haskell"><span class="kw">class</span> <span class="dt">Monoid</span> m <span class="kw">where</span>
	35	<span class="ot"> mempty ::</span> m
	36	<span class="ot"> mappend ::</span> m <span class="ot">-></span> m <span class="ot">-></span> m</code></pre>
	37	<p>into<sup><a href="#fn1" class="footnoteRef" id="fnref1">1</a></sup></p>
	38	<pre class="sourceCode haskell"><code class="sourceCode haskell"><span class="kw">class</span> <span class="dt">Semigroup</span> m <span class="kw">where</span>
	39	<span class="ot"> mappend ::</span> m <span class="ot">-></span> m <span class="ot">-></span> m
	40
	41	<span class="kw">class</span> <span class="dt">Semigroup</span> m <span class="ot">=></span> <span class="dt">Monoid</span> m <span class="kw">where</span>
	42	<span class="ot"> mempty ::</span> m</code></pre>
	43	<p><a href="https://ghc.haskell.org/trac/ghc/wiki/DefaultSuperclassInstances">Something that has been proposed before</a> (in a <a href="https://mail.haskell.org/pipermail/haskell-prime/2006-August/001587.html">few</a> <a href="https://wiki.haskell.org/Superclass_defaults">different</a> <a href="https://wiki.haskell.org/Class_system_extension_proposal">forms</a>) and which I suggest be more actively considered if changes like these are to become common is to allow <em>superclass instances to be declared within a subclass declaration</em>. This would allow you to write a single <code>instance</code> declaration for a class, and in that body <em>also include implementations for methods belong to a superclass of that class</em> by some means<sup><a href="#fn2" class="footnoteRef" id="fnref2">2</a></sup>:</p>
	44	<pre class="sourceCode haskell"><code class="sourceCode haskell"><span class="kw">newtype</span> <span class="dt">MyId</span> a <span class="fu">=</span> <span class="dt">MyId</span> a
	45
	46	<span class="kw">instance</span> <span class="dt">Monad</span> <span class="dt">MyId</span> <span class="kw">where</span>
	47	<span class="co">-- Functor method</span>
	48	fmap f (<span class="dt">MyId</span> x) <span class="fu">=</span> <span class="dt">MyId</span> (f x)
	49
	50	<span class="co">-- Applicative methods</span>
	51	pure <span class="fu">=</span> <span class="dt">MyId</span>
	52	<span class="dt">MyId</span> f <span class="fu"><*></span> <span class="dt">MyId</span> x <span class="fu">=</span> <span class="dt">MyId</span> (f x)
	53
	54	<span class="co">-- Monad methods</span>
	55	return <span class="fu">=</span> <span class="dt">MyId</span>
	56	<span class="dt">MyId</span> x <span class="fu">>>=</span> f <span class="fu">=</span> f x</code></pre>
	57	<p>For the <code>Monoid</code>/<code>Semigroup</code> proposal, this would mean that any <code>Monoid</code> instances that exist would continue to work unchanged, but new instances could (optionally) split apart their declarations. Under this proposal, either of these would be acceptable:</p>
	58	<pre class="sourceCode haskell"><code class="sourceCode haskell"><span class="kw">class</span> <span class="dt">Semigroup</span> m <span class="kw">where</span><span class="ot"> mappend ::</span> m <span class="ot">-></span> m <span class="ot">-></span> m
	59	<span class="kw">class</span> <span class="dt">Semigroup</span> m <span class="ot">=></span> <span class="dt">Monoid</span> m <span class="kw">where</span><span class="ot"> mempty ::</span> m
	60
	61	<span class="co">-- combined `instance` declarations:</span>
	62	<span class="kw">instance</span> <span class="dt">Monoid</span> [a] <span class="kw">where</span>
	63	mempty <span class="fu">=</span> []
	64	mappend <span class="fu">=</span> (<span class="fu">++</span>)</code></pre>
	65	<p>or, equivalently,</p>
	66	<pre class="sourceCode haskell"><code class="sourceCode haskell"><span class="kw">class</span> <span class="dt">Semigroup</span> m <span class="kw">where</span><span class="ot"> mappend ::</span> m <span class="ot">-></span> m <span class="ot">-></span> m
	67	<span class="kw">class</span> <span class="dt">Semigroup</span> m <span class="ot">=></span> <span class="dt">Monoid</span> m <span class="kw">where</span><span class="ot"> mempty ::</span> m
	68
	69	<span class="co">-- split apart `instance` declarations</span>
	70	<span class="kw">instance</span> <span class="dt">Semigroup</span> [a] <span class="kw">where</span>
	71	mappend <span class="fu">=</span> (<span class="fu">++</span>)
	72
	73	<span class="kw">instance</span> <span class="dt">Monoid</span> [a] <span class="kw">where</span>
	74	mempty <span class="fu">=</span> []</code></pre>
	75	<p>And because the <code>Monoid</code> declaration for <code>[]</code> <a href="http://hackage.haskell.org/package/base-4.8.0.0/docs/src/GHC-Base.html#line-227">is already written like the former</a>, we can make the <code>Semigroup</code>/<code>Monoid</code> split without having to rewrite the instance declarations!</p>
	76	<p>Because this lowers the cost of updating for new versions, various <em>other</em> useful changes might be considered that would otherwise involve far too much breakage. For example, we could consider splitting <code>Num</code> apart into small constituent parts<sup><a href="#fn3" class="footnoteRef" id="fnref3">3</a></sup>:</p>
	77	<pre class="sourceCode haskell"><code class="sourceCode haskell"><span class="kw">class</span> <span class="dt">Add</span> a <span class="kw">where</span><span class="ot"> (+) ::</span> a <span class="ot">-></span> a <span class="ot">-></span> a
	78	<span class="kw">class</span> <span class="dt">Sub</span> a <span class="kw">where</span><span class="ot"> (-) ::</span> a <span class="ot">-></span> a <span class="ot">-></span> a
	79	<span class="kw">class</span> <span class="dt">Zero</span> a <span class="kw">where</span><span class="ot"> zero ::</span> a
	80
	81	<span class="kw">class</span> <span class="dt">Mul</span> a <span class="kw">where</span><span class="ot"> (*) ::</span> a <span class="ot">-></span> a <span class="ot">-></span> a
	82	<span class="kw">class</span> <span class="dt">One</span> a <span class="kw">where</span><span class="ot"> one ::</span> a
	83
	84	<span class="kw">class</span> <span class="dt">FromInteger</span> a <span class="kw">where</span>
	85	<span class="ot"> fromInteger ::</span> <span class="dt">Integer</span> <span class="ot">-></span> a
	86
	87	<span class="kw">instance</span> <span class="dt">Zero</span> a <span class="kw">where</span> zero <span class="fu">=</span> fromInteger <span class="dv">0</span>
	88	<span class="kw">instance</span> <span class="dt">One</span> a <span class="kw">where</span> one <span class="fu">=</span> fromInteger <span class="dv">1</span>
	89
	90	<span class="kw">class</span> <span class="dt">Signed</span> a <span class="kw">where</span>
	91	<span class="ot"> negate ::</span> a <span class="ot">-></span> a
	92	<span class="ot"> abs ::</span> a <span class="ot">-></span> a
	93	<span class="ot"> signum ::</span> a <span class="ot">-></span> a
	94
	95	<span class="kw">class</span> ( <span class="dt">Eq</span> a
	96	, <span class="dt">Show</span> a
	97	, <span class="dt">Add</span> a
	98	, <span class="dt">Sub</span> a
	99	, <span class="dt">Mul</span> a
	100	, <span class="dt">FromInteger</span> a
	101	, <span class="dt">Signed</span> a) <span class="ot">=></span> <span class="dt">Num</span> a <span class="kw">where</span></code></pre>
	102	<p>which would allow certain numeric types to only implement a subset of the relevant operations:</p>
	103	<pre class="sourceCode haskell"><code class="sourceCode haskell"><span class="kw">data</span> <span class="dt">Nat</span> <span class="fu">=</span> <span class="dt">Zero</span> <span class="fu">\|</span> <span class="dt">Succ</span> <span class="dt">Nat</span>
	104
	105	<span class="kw">instance</span> <span class="dt">Add</span> <span class="dt">Nat</span> <span class="kw">where</span>
	106	<span class="dt">Z</span> <span class="fu">+</span> y <span class="fu">=</span> s
	107	<span class="dt">S</span> x <span class="fu">+</span> y <span class="fu">=</span> <span class="dt">S</span> (x <span class="fu">+</span> y)
	108
	109	<span class="co">{- et cetera --- but no implementations for e.g. Signed,</span>
	110	<span class="co"> - which is not meaningful for `Nat`!</span>
	111	<span class="co"> -}</span></code></pre>
	112	<p>and also allow current <code>Num</code> functions to have a looser set of constraints than they do at present:</p>
	113	<pre class="sourceCode haskell"><code class="sourceCode haskell">sum<span class="ot"> ::</span> (<span class="dt">Zero</span> a, <span class="dt">Add</span> a) <span class="ot">=></span> [a] <span class="ot">-></span> a
	114	sum (x<span class="fu">:</span>xs) <span class="fu">=</span> x <span class="fu">+</span> sum xs
	115	sum [] <span class="fu">=</span> zero
	116
	117	<span class="ot">prod ::</span> (<span class="dt">One</span> a, <span class="dt">Mul</span> a) <span class="ot">=></span> [a] <span class="ot">-></span> a
	118	prod (x<span class="fu">:</span>xs) <span class="fu">=</span> x <span class="fu">*</span> prod xs
	119	prod [] <span class="fu">=</span> one</code></pre>
	120	<p>We could also consider splitting <code>Arrow</code><sup><a href="#fn4" class="footnoteRef" id="fnref4">4</a></sup> into distinct components:</p>
	121	<pre class="sourceCode haskell"><code class="sourceCode haskell"><span class="kw">class</span> <span class="dt">Category</span> a <span class="ot">=></span> <span class="dt">Pairwise</span> a <span class="kw">where</span>
	122	<span class="ot"> first ::</span> a b c <span class="ot">-></span> a (b, d) (c, d)
	123	<span class="ot"> second ::</span> a b c <span class="ot">-></span> a (d, b) (d, c)
	124	<span class="ot"> (***) ::</span> a b c <span class="ot">-></span> a b' c' <span class="ot">-></span> a (b, b') (c, c')
	125	<span class="ot"> (&&&) ::</span> a b c <span class="ot">-></span> a b c' <span class="ot">-></span> a b (c, c')
	126
	127	<span class="kw">class</span> <span class="dt">Pairwise</span> a <span class="ot">=></span> <span class="dt">Arrow</span> a <span class="kw">where</span>
	128	<span class="ot"> arr ::</span> (b <span class="ot">-></span> c) <span class="ot">-></span> a b c</code></pre>
	129	<p>or even (dare I dream) splitting <code>Bits</code> into <a href="https://downloads.haskell.org/~ghc/7.8.2/docs/html/libraries/base-4.7.0.0/Data-Bits.html#t:Bits">something that is not a 22-method monstrosity</a>!</p>
	130	<h1 id="potential-drawbacks">Potential Drawbacks</h1>
	131	<p>On the other hand, this proposal does have some down-sides:</p>
	132	<h2 id="grepping-for-instance-declarations">Grepping for Instance Declarations</h2>
	133	<p>Right now, I can often find an instance declaration for a type <code>T</code> by grepping for <code>instance C T</code> (modulo some line noise) whereas with this change, it's possible that there <em>is</em> no declaration for <code>instance C T</code>, because all of <code>C</code>'s methods are declared by a subclass <code>C'</code> instead. The compiler ought to be able to deal with this without problem, which means that tools like Haddock documentation should somewhat alleviate this problem, but a user might be confused.</p>
	134	<h2 id="introduces-new-possible-errors">Introduces New Possible Errors</h2>
	135	<p>The declarations below are of course nonsensical, and would be rejected by the compiler—but the fact that this change would <em>introduce new failure conditions at all</em> is a drawback of the proposal.</p>
	136	<pre class="sourceCode haskell"><code class="sourceCode haskell"><span class="kw">instance</span> <span class="dt">Semigroup</span> <span class="dt">Int</span> <span class="kw">where</span>
	137	mappend <span class="fu">=</span> (<span class="fu">+</span>)
	138
	139	<span class="kw">instance</span> <span class="dt">Monoid</span> <span class="dt">Int</span> <span class="kw">where</span>
	140	<span class="co">-- this conflicts with an existing declaration</span>
	141	mappend <span class="fu">=</span> (<span class="fu">*</span>)
	142	mempty <span class="fu">=</span> <span class="dv">1</span></code></pre>
	143	<h2 id="a-pragma-less-extension">A Pragma-Less Extension</h2>
	144	<p>In order to be <em>really</em> useful, we'd want to use this without a <code>LANGUAGE</code> pragma. After all, we're arguing for it on the basis of preserving backwards-compatibility, but that argument is much less compelling if we still have to change the source files to make use of it! On the other hand, that means we'd have included a GHC <em>extension</em> that takes effect despite not being enabled, which is <em>also</em> a worrying precedent!</p>
	145	<p>It still might be a useful extension even if it had to be enabled by a <code>LANGUAGE</code> pragma, as it is easier to add said pragma to a source file than to manually break apart dozens of instance declarations, but it makes this feature less compelling in general.</p>
	146	<h1 id="in-conclusion">In Conclusion</h1>
	147	<p>As I said before, my primary objection to changes of the above nature is that they are <em>breaking</em>. I don't want to have to modify a handful of miscellaneous instance declarations on a yearly basis as people discover new levels of abstraction or become dissatisfied with current choices, especially as those changes will grow more extensive as I build more projects in Haskell! But with an extension like this, we could grow the typeclass ecosystem gradually and fix what we see as past warts <em>while maintaining backwards-compatibility</em>, which would be a very powerful tool to have.</p>
	148	<div class="footnotes">
	149	<hr />
	150	<ol>
	151	<li id="fn1"><p>This is perhaps more simplistic than we want: we can also use the existing <code>Semigroup</code> class from <a href="http://hackage.haskell.org/package/Semigroup-0.0.7/docs/Data-Semigroup.html#t:Semigroup">the <code>semigroup</code> package</a> and then, in the <code>Monoid</code> class declaration, explain how to derive the methods of the superclass. This would look like:</p>
	152	<pre class="sourceCode haskell"><code class="sourceCode haskell"><span class="kw">class</span> <span class="dt">Semigroup</span> m <span class="ot">=></span> <span class="dt">Monoid</span> m <span class="kw">where</span>
	153	<span class="ot"> mempty ::</span> m
	154	<span class="ot"> mappend ::</span> m <span class="ot">-></span> m <span class="ot">-></span> m
	155
	156	<span class="kw">instance</span> <span class="dt">Semigroup</span> m <span class="kw">where</span>
	157	(<span class="fu">.++.</span>) <span class="fu">=</span> mappend</code></pre>
	158	<p>The example above is slightly simpler, which is why I relegated this version to a footnote.<a href="#fnref1">↩</a></p></li>
	159	<li id="fn2"><p>This isn't a concrete proposal, so maybe the actual syntax or semantics of these things should be changed! I want to focus on the <em>feature</em> and not the <em>instantiation</em>.<a href="#fnref2">↩</a></p></li>
	160	<li id="fn3"><p>For this example, I added <code>Zero</code> and <code>One</code> classes so that a given type might implement an additive and multiplicative unit while not necessarily having a sensible <code>FromInteger</code> implementation. For example, it might not make sense to implement <code>fromInteger</code> for a complex number, but complex numbers clearly have an additive unit:</p>
	161	<pre class="sourceCode haskell"><code class="sourceCode haskell"><span class="kw">data</span> <span class="dt">Complex</span> <span class="fu">=</span> <span class="dt">Complex</span> <span class="dt">Float</span> <span class="dt">Float</span>
	162	<span class="kw">deriving</span> (<span class="dt">Eq</span>, <span class="dt">Show</span>)
	163
	164	<span class="kw">instance</span> <span class="dt">Zero</span> <span class="dt">Complex</span> <span class="kw">where</span>
	165	zero <span class="fu">=</span> <span class="dt">Complex</span> (<span class="dv">0</span><span class="fu">.</span><span class="dv">0</span>, <span class="dv">0</span><span class="fu">.</span><span class="dv">0</span>)</code></pre>
	166	<p>This means that the <code>Sum</code> and <code>Product</code> monoids could be rewritten like:</p>
	167	<pre class="sourceCode haskell"><code class="sourceCode haskell"><span class="kw">newtype</span> <span class="dt">Product</span> a <span class="fu">=</span> <span class="dt">Product</span> {<span class="ot"> getProduct ::</span> a }
	168	<span class="kw">deriving</span> (<span class="dt">Eq</span>, <span class="dt">Show</span>)
	169
	170	<span class="kw">instance</span> (<span class="dt">One</span> a, <span class="dt">Mult</span> a) <span class="ot">=></span> <span class="dt">Monoid</span> (<span class="dt">Product</span> a) <span class="kw">where</span>
	171	mempty <span class="fu">=</span> <span class="dt">Product</span> one
	172	x <span class="ot">`mappend`</span> y <span class="fu">=</span> <span class="dt">Product</span> (getProduct x <span class="fu">*</span> getProduct y)</code></pre>
	173	<p>Notice that I added <code>Zero</code> and <code>One</code> in such a way that an existing <code>Num</code> instance declaration will not have to change anything to implement those classes!<a href="#fnref3">↩</a></p></li>
	174	<li id="fn4"><p>I have on numerous occasions had reason to use the <code>Arrow</code> abstraction, but haven't had a sensible implementation of <code>arr</code>. To use a contrived example, I could define a GADT that can describe the structure of boolean circuits in a way that resembles an Arrow, but has no way of expressing <code>arr</code>:</p>
	175	<pre class="sourceCode haskell"><code class="sourceCode haskell"><span class="kw">data</span> <span class="dt">Circ</span> a b <span class="kw">where</span>
	176	<span class="dt">BoolFunc</span><span class="ot"> ::</span> (<span class="dt">Bool</span> <span class="ot">-></span> <span class="dt">Bool</span>) <span class="ot">-></span> <span class="dt">Circ</span> <span class="dt">Bool</span> <span class="dt">Bool</span>
	177	<span class="dt">Ident</span><span class="ot"> ::</span> <span class="dt">Circ</span> a a
	178	<span class="dt">Compose</span><span class="ot"> ::</span> <span class="dt">Circ</span> a b <span class="ot">-></span> <span class="dt">Circ</span> b c <span class="ot">-></span> <span class="dt">Circ</span> a c
	179	<span class="dt">First</span><span class="ot"> ::</span> <span class="dt">Circ</span> a b <span class="ot">-></span> <span class="dt">Circ</span> (a, d) (b, d)
	180	<span class="dt">Second</span><span class="ot"> ::</span> <span class="dt">Circ</span> a b <span class="ot">-></span> <span class="dt">Circ</span> (d, a) (d, b)
	181	<span class="dt">Parallel</span><span class="ot"> ::</span> <span class="dt">Circ</span> a b <span class="ot">-></span> <span class="dt">Circ</span> a' b' <span class="ot">-></span> <span class="dt">Circ</span> (a, a') (b, b')
	182	<span class="dt">Fanout</span><span class="ot"> ::</span> <span class="dt">Circ</span> a b <span class="ot">-></span> <span class="dt">Circ</span> a b' <span class="ot">-></span> <span class="dt">Circ</span> a (b, b')
	183
	184	<span class="kw">instance</span> <span class="dt">Category</span> <span class="dt">Circ</span> <span class="kw">where</span>
	185	id <span class="fu">=</span> <span class="dt">Ident</span>
	186	(<span class="fu">.</span>) <span class="fu">=</span> flip <span class="dt">Compose</span>
	187
	188	<span class="kw">instance</span> <span class="dt">Arrow</span> <span class="dt">Circ</span> <span class="kw">where</span>
	189	first <span class="fu">=</span> <span class="dt">First</span>
	190	second <span class="fu">=</span> <span class="dt">Second</span>
	191	(<span class="fu">***</span>) <span class="fu">=</span> <span class="dt">Parallel</span>
	192	(<span class="fu">&&&</span>) <span class="fu">=</span> <span class="dt">Fanout</span>
	193	arr <span class="fu">=</span> error <span class="st">"Nothing sensible to put here!"</span>
	194
	195	<span class="co">-- for example, using this definition, we can write xor:</span>
	196	<span class="ot">xor ::</span> <span class="dt">BoolCircuit</span> (<span class="dt">Bool</span>, <span class="dt">Bool</span>) <span class="dt">Bool</span>
	197	xor <span class="fu">=</span> ((first <span class="dt">Not</span> <span class="fu">>>></span> <span class="dt">And</span>) <span class="fu">&&&</span> (second <span class="dt">Not</span> <span class="fu">>>></span> <span class="dt">And</span>)) <span class="fu">>>></span> <span class="dt">Or</span>
	198
	199	<span class="co">-- ...and using xor, write a half-adder:</span>
	200	<span class="ot">halfAdder ::</span> <span class="dt">BoolCircuit</span> (<span class="dt">Bool</span>, <span class="dt">Bool</span>) (<span class="dt">Bool</span>, <span class="dt">Bool</span>)
	201	halfAdder <span class="fu">=</span> xor <span class="fu">&&&</span> <span class="dt">And</span></code></pre>
	202	<p>This is not an unreasonable definition—it would be nice to abstract over such a definition using existing tools—but the structure of the <code>Arrow</code> typeclass makes it difficult!<a href="#fnref4">↩</a></p></li>
	203	</ol>
	204	</div>
	205	</body>
	206	</html>

+337

-0

posts/semigroup.md less more

	1	As part of recent type-refactoring efforts in Haskell,
	2	a discussion about adding
	3	[`Semigroup` as a parent class of `Monoid`](https://mail.haskell.org/pipermail/libraries/2015-March/025381.html)
	4	has been bouncing around the mailing list.
	5	From a theoretical point of view, this is a great idea:
	6	it is more flexible than the current approach that would allow
	7	for greater expressibility.
	8
	9	From a _practical_ point of view, however, I am inclined to oppose it.
	10	Not because it is _in itself_ a bad change—it's a very reasonable
	11	change that has advantages for new code—but
	12	because I have, in the past, had to update large systems
	13	written in Haskell after GHC updates, and therefore I know that
	14	_this kind of change has a cost_. The Applicative-Monad changes
	15	seem to have made way for the Foldable-Traversable Prelude, but
	16	those have in turn inspired a number of suggestions for
	17	modifications to the Haskell standard library, each one of which,
	18	taken on its own, is reasonable, but taken as a mass, mean _much
	19	more work_ for maintainers. This is _especially_ true if we continue
	20	on this path of making minor refactorings at each release: each year
	21	a project will need changes, or it will quickly bit-rot beyond
	22	utility.
	23
	24	# Default Superclass Instances
	25
	26	There is, however, an alternative I would like to discuss.
	27	Some of these changes—especially the `Semigroup`/`Monoid`
	28	split—seem to involve taking the functionality of
	29	a class and splitting it into multiple smaller classes. For
	30	example, we can think of the `Semigroup`/`Monoid` change as
	31	converting
	32
	33	~~~~{.haskell}
	34	class Monoid m where
	35	mempty :: m
	36	mappend :: m -> m -> m
	37	~~~~
	38
	39	into[^semi]
	40
	41	~~~~{.haskell}
	42	class Semigroup m where
	43	mappend :: m -> m -> m
	44
	45	class Semigroup m => Monoid m where
	46	mempty :: m
	47	~~~~
	48
	49	[Something that has been proposed before](https://ghc.haskell.org/trac/ghc/wiki/DefaultSuperclassInstances)
	50	(in a [few](https://mail.haskell.org/pipermail/haskell-prime/2006-August/001587.html)
	51	[different](https://wiki.haskell.org/Superclass_defaults)
	52	[forms](https://wiki.haskell.org/Class_system_extension_proposal))
	53	and which I suggest be
	54	more actively considered if changes like these are to become
	55	common is to allow _superclass instances to be declared within
	56	a subclass declaration_. This would allow you to write a single
	57	`instance` declaration for a class, and in that body _also include
	58	implementations for methods belong to a superclass of that
	59	class_ by some means[^note]:
	60
	61	[^note]: This isn't a concrete proposal, so maybe the actual syntax
	62	or semantics of these things should be changed! I want to focus
	63	on the _feature_ and not the _instantiation_.
	64
	65	~~~~{.haskell}
	66	newtype MyId a = MyId a
	67
	68	instance Monad MyId where
	69	-- Functor method
	70	fmap f (MyId x) = MyId (f x)
	71
	72	-- Applicative methods
	73	pure = MyId
	74	MyId f <*> MyId x = MyId (f x)
	75
	76	-- Monad methods
	77	return = MyId
	78	MyId x >>= f = f x
	79	~~~~
	80
	81	For the `Monoid`/`Semigroup` proposal, this would mean that any
	82	`Monoid` instances that exist would continue to
	83	work unchanged, but new instances could (optionally) split apart
	84	their declarations. Under this proposal, either of these would
	85	be acceptable:
	86
	87	~~~~{.haskell}
	88	class Semigroup m where mappend :: m -> m -> m
	89	class Semigroup m => Monoid m where mempty :: m
	90
	91	-- combined `instance` declarations:
	92	instance Monoid [a] where
	93	mempty = []
	94	mappend = (++)
	95	~~~~
	96
	97	or, equivalently,
	98
	99	~~~~{.haskell}
	100	class Semigroup m where mappend :: m -> m -> m
	101	class Semigroup m => Monoid m where mempty :: m
	102
	103	-- split apart `instance` declarations
	104	instance Semigroup [a] where
	105	mappend = (++)
	106
	107	instance Monoid [a] where
	108	mempty = []
	109	~~~~
	110
	111	And because the `Monoid` declaration for `[]`
	112	[is already written like the former](http://hackage.haskell.org/package/base-4.8.0.0/docs/src/GHC-Base.html#line-227),
	113	we can make the `Semigroup`/`Monoid` split without having to rewrite
	114	the instance declarations!
	115
	116	Because this lowers the cost of updating for new versions, various
	117	_other_ useful changes might be considered that would otherwise
	118	involve far too much breakage. For example, we could consider
	119	splitting `Num` apart into small constituent parts[^num]:
	120
	121	~~~~{.haskell}
	122	class Add a where (+) :: a -> a -> a
	123	class Sub a where (-) :: a -> a -> a
	124	class Zero a where zero :: a
	125
	126	class Mul a where (*) :: a -> a -> a
	127	class One a where one :: a
	128
	129	class FromInteger a where
	130	fromInteger :: Integer -> a
	131
	132	instance Zero a where zero = fromInteger 0
	133	instance One a where one = fromInteger 1
	134
	135	class Signed a where
	136	negate :: a -> a
	137	abs :: a -> a
	138	signum :: a -> a
	139
	140	class ( Eq a
	141	, Show a
	142	, Add a
	143	, Sub a
	144	, Mul a
	145	, FromInteger a
	146	, Signed a) => Num a where
	147	~~~~
	148
	149	which would allow certain numeric types to only implement a
	150	subset of the relevant operations:
	151
	152	~~~~{.haskell}
	153	data Nat = Zero \| Succ Nat
	154
	155	instance Add Nat where
	156	Z + y = s
	157	S x + y = S (x + y)
	158
	159	{- et cetera --- but no implementations for e.g. Signed,
	160	- which is not meaningful for `Nat`!
	161	-}
	162	~~~~
	163
	164	and also allow current `Num` functions to have a looser set of
	165	constraints than they do at present:
	166
	167	~~~~{.haskell}
	168	sum :: (Zero a, Add a) => [a] -> a
	169	sum (x:xs) = x + sum xs
	170	sum [] = zero
	171
	172	prod :: (One a, Mul a) => [a] -> a
	173	prod (x:xs) = x * prod xs
	174	prod [] = one
	175	~~~~
	176
	177	We could also consider splitting `Arrow`[^arr] into distinct
	178	components:
	179
	180	~~~~{.haskell}
	181	class Category a => Pairwise a where
	182	first :: a b c -> a (b, d) (c, d)
	183	second :: a b c -> a (d, b) (d, c)
	184	(***) :: a b c -> a b' c' -> a (b, b') (c, c')
	185	(&&&) :: a b c -> a b c' -> a b (c, c')
	186
	187	class Pairwise a => Arrow a where
	188	arr :: (b -> c) -> a b c
	189	~~~~
	190
	191	or even (dare I dream) splitting `Bits` into
	192	[something that is not a 22-method monstrosity](https://downloads.haskell.org/~ghc/7.8.2/docs/html/libraries/base-4.7.0.0/Data-Bits.html#t:Bits)!
	193
	194	# Potential Drawbacks
	195
	196	On the other hand, this proposal does have some down-sides:
	197
	198	## Grepping for Instance Declarations
	199
	200	Right now, I can often find an instance declaration for a type `T` by
	201	grepping for `instance C T` (modulo some line noise) whereas with this
	202	change, it's possible that there _is_ no declaration for
	203	`instance C T`, because all of `C`'s methods are declared by a
	204	subclass `C'` instead. The compiler ought to be able to deal with
	205	this without problem, which means that tools like Haddock documentation
	206	should somewhat alleviate this problem, but a user might be confused.
	207
	208	## Introduces New Possible Errors
	209
	210	The declarations below are of course nonsensical, and would be
	211	rejected by the compiler—but the fact that this change would
	212	_introduce new failure conditions at all_ is a drawback
	213	of the proposal.
	214
	215	~~~~{.haskell}
	216	instance Semigroup Int where
	217	mappend = (+)
	218
	219	instance Monoid Int where
	220	-- this conflicts with an existing declaration
	221	mappend = (*)
	222	mempty = 1
	223	~~~~
	224
	225	## A Pragma-Less Extension
	226
	227	In order to be _really_ useful, we'd want to use this without a
	228	`LANGUAGE` pragma. After all, we're arguing for it on the basis of
	229	preserving backwards-compatibility, but that argument is much less
	230	compelling if we still have to change the source files to make use
	231	of it! On the other hand, that means we'd have included a GHC
	232	_extension_ that takes effect despite not being enabled, which
	233	is _also_ a worrying precedent!
	234
	235	It still might be a useful extension even if it had to be enabled by
	236	a `LANGUAGE` pragma, as it is easier to add said pragma to a source
	237	file than to manually break apart dozens of instance declarations,
	238	but it makes this feature less compelling in general.
	239
	240	# In Conclusion
	241
	242	As I said before, my primary objection to changes of the above nature is
	243	that they are _breaking_. I don't want to have to modify a handful of
	244	miscellaneous instance declarations on a yearly basis as people
	245	discover new levels of abstraction or become dissatisfied with current
	246	choices, especially as those changes will grow more extensive
	247	as I build more projects in Haskell! But with an extension like this,
	248	we could grow the typeclass ecosystem gradually and fix what we see
	249	as past warts _while maintaining backwards-compatibility_, which
	250	would be a very powerful tool to have.
	251
	252	[^arr]: I have on numerous occasions had reason to use the
	253	`Arrow` abstraction, but haven't had a sensible implementation
	254	of `arr`. To use a contrived example, I could define a GADT that
	255	can describe the structure of boolean circuits in a way that
	256	resembles an Arrow, but has no way of expressing `arr`:
	257
	258	~~~~{.haskell}
	259	data Circ a b where
	260	BoolFunc :: (Bool -> Bool) -> Circ Bool Bool
	261	Ident :: Circ a a
	262	Compose :: Circ a b -> Circ b c -> Circ a c
	263	First :: Circ a b -> Circ (a, d) (b, d)
	264	Second :: Circ a b -> Circ (d, a) (d, b)
	265	Parallel :: Circ a b -> Circ a' b' -> Circ (a, a') (b, b')
	266	Fanout :: Circ a b -> Circ a b' -> Circ a (b, b')
	267
	268	instance Category Circ where
	269	id = Ident
	270	(.) = flip Compose
	271
	272	instance Arrow Circ where
	273	first = First
	274	second = Second
	275	(***) = Parallel
	276	(&&&) = Fanout
	277	arr = error "Nothing sensible to put here!"
	278
	279	-- for example, using this definition, we can write xor:
	280	xor :: BoolCircuit (Bool, Bool) Bool
	281	xor = ((first Not >>> And) &&& (second Not >>> And)) >>> Or
	282
	283	-- ...and using xor, write a half-adder:
	284	halfAdder :: BoolCircuit (Bool, Bool) (Bool, Bool)
	285	halfAdder = xor &&& And
	286	~~~~
	287
	288	This is not an unreasonable definition—it would be nice to
	289	abstract over such a definition using existing tools—but the structure
	290	of the `Arrow` typeclass makes it difficult!
	291
	292	[^num]: For this example, I added `Zero` and `One` classes so that a
	293	given type might implement an additive and multiplicative unit while
	294	not necessarily having a sensible `FromInteger` implementation. For
	295	example, it might not make sense to implement `fromInteger` for a
	296	complex number, but complex numbers clearly have an additive unit:
	297
	298	~~~~{.haskell}
	299	data Complex = Complex Float Float
	300	deriving (Eq, Show)
	301
	302	instance Zero Complex where
	303	zero = Complex (0.0, 0.0)
	304	~~~~
	305
	306	This means that the `Sum` and `Product` monoids could be rewritten like:
	307
	308	~~~~{.haskell}
	309	newtype Product a = Product { getProduct :: a }
	310	deriving (Eq, Show)
	311
	312	instance (One a, Mult a) => Monoid (Product a) where
	313	mempty = Product one
	314	x `mappend` y = Product (getProduct x * getProduct y)
	315	~~~~
	316
	317	Notice that I added `Zero` and `One` in such a way that an existing
	318	`Num` instance declaration will not have to change anything to implement
	319	those classes!
	320
	321	[^semi]: This is perhaps more simplistic than we want: we can also use
	322	the existing `Semigroup` class from
	323	[the `semigroup` package](http://hackage.haskell.org/package/Semigroup-0.0.7/docs/Data-Semigroup.html#t:Semigroup)
	324	and then, in the `Monoid` class declaration, explain how to derive the
	325	methods of the superclass. This would look like:
	326
	327	~~~~{.haskell}
	328	class Semigroup m => Monoid m where
	329	mempty :: m
	330	mappend :: m -> m -> m
	331
	332	instance Semigroup m where
	333	(.++.) = mappend
	334	~~~~
	335
	336	The example above is slightly simpler, which is why I relegated this
	337	version to a footnote.

+161

-0

posts/sexprs.md less more

	1	I did not like any of the S-Expression libraries that are available for
	2	Haskell, for various reasons. Sometimes the API was quite poor. Sometimes
	3	the implementation was wonky. Sometimes it was just a license issue.
	4	Whatever it was, I felt like it was possible to do better.
	5
	6	So I did.
	7
	8	In the process, I _completely_ over-engineered the library. It does a
	9	lot, perhaps more than necessary. It understands several flavors of
	10	S-Expressions (Common Lisp's, Scheme's, Rivest's, and Clojure's),
	11	exposing them in different ways depending on how the consumer wants
	12	to use them, with optional support for various reader macros. It
	13	should be easy for a library user to extend it in various ways.
	14
	15	That said, I hope it should be useful enough!
	16
	17	# S-Expression Representations
	18
	19	S-Expressions are generally built up of cons pairs, which are written
	20
	21	~~~~
	22	( x . y )
	23	~~~~
	24
	25	and empty lists, which can be written differently depending on the
	26	language
	27
	28	~~~~
	29	NIL ;; Common Lisp
	30	() ;; also Common Lisp
	31	'() ;; Scheme, which is sugar for
	32	(quote ()) ;; also Scheme
	33	;; etc.
	34	~~~~
	35
	36	So one way of exposing an S-Expression based API is to use a data type
	37	like:
	38
	39	~~~~{.haskell}
	40	data SExpr
	41	= Atom {- some atom representation -}
	42	\| Cons SExpr Sexpr
	43	\| Nil
	44	~~~~
	45
	46	…but this also isn't how S-Expressions are usually used. You rarely see
	47	dotted pairs in practice; instead, most S-Expressions are well-formed
	48	lists, which are sugar for sequences of cons pairs that end in an
	49	empty list:
	50
	51	~~~~
	52	(a b c d) === (a . (b . (c . (d . ()))))
	53	~~~~
	54
	55	So perhaps we want this representation:
	56
	57	~~~~{.haskell}
	58	data SExpr
	59	= Atom {- ... -}
	60	\| List [SExpr]
	61	~~~~
	62
	63	…but while this is acceptable for some uses, this API means we can't
	64	address the presence of dotted lists at all! So perhaps we should
	65	include them as a special case, making them easy to throw away in
	66	processing:
	67
	68	~~~~{.haskell}
	69	data SExpr
	70	= Atom {- ... -}
	71	\| List [SExpr] -- for (a b ... c)
	72	\| DotList [SExpr] Sexpr -- for (a b ... c . d)
	73	~~~~
	74
	75	[library] always uses the first representation internally, but
	76	exposes a version of each of these data types. The first is
	77	called `SExpr`, while the latter two are called `WellFormedSExpr`
	78	and `RichSExpr`. It also exposes, for each of these, a set of
	79	pattern synonyms so that they can be used interchangeably.
	80	For example, the code will work regardless of which S-Expression
	81	type is being used:
	82
	83	~~~~{.haskell}
	84	import Data.SExpression.Repr.{- any -}
	85
	86	sum (Atom n) = n
	87	sum (Cons x y) = sum x + sum y
	88	sum Nil = 0
	89	~~~~
	90
	91	# Which Atoms?
	92
	93	There is no "standard S-Expression" grammar, and the various kinds of
	94	S-Expressions understood by different tools have general commonality
	95	but can vary wildly in the specifics. For example, the data types
	96	understood by Common Lisp and Scheme are broadly similar but have
	97	mild differences, while the Rivest Internet Draft about S-Expressions
	98	as a transport mechanism has a very idiosyncratic set of atom types
	99	used to represent octet strings.
	100
	101	[Library] does not specify one, but instead exposes several. The
	102	modules `Data.SExpression.CommonLisp`, `Data.SExpression.Scheme`,
	103	and `Data.SExpression.Rivest` all understand various flavors of
	104	atoms: each defines its own `Atom` type and functions to parse
	105	and serialize S-expressions using that `Atom`.
	106
	107	However, the machinery of S-expressions themselves (i.e. the part
	108	that is basically just nested parens) is generic and shared
	109	between the implementations. The `SExpr` type is parameterized
	110	by an atom type, which could be one of the built-in atom types:
	111
	112	~~~~
	113	Prelude> import qualified Data.SExpression.CommonLisp as CL
	114	Prelude CL> import qualified Data.SExpression.Scheme as S
	115	Prelude CL S> :t CL.parse
	116	CL.parse :: Text -> SExpr CL.Atom
	117	Prelude CL S> :t S.parse
	118	S.parse :: Text -> SExpr S.Atom
	119	~~~~
	120
	121	Additionally, interfaces are present which would allow a user to
	122	extend the library with their own notion of an atom. We can use
	123	any type we want as an atom, so long as we can provide an
	124	AttoParsec parser for it and a printer for it, we can build
	125	up a `SExprSpec` value, which describes a custom S-expression
	126	format:
	127
	128	~~~~{.haskell}
	129	import Control.Applicative
	130	import Data.SExpression.General
	131	import Data.Text (Text, pack)
	132
	133	mySpec :: SExprSpec SExpr Int
	134	mySpec = mkSExprSpec parseNum showNum
	135	where parseNum = read <$> many1 digit
	136	showNum = pack . show
	137
	138	myParse :: Text -> Either String (SExpr Int)
	139	myParse = parseGeneral mySpec
	140
	141	myEmit :: SExpr Int -> Text
	142	myEmit = emitGeneral mySpec
	143
	144	myParseRich :: Text -> Either String (RichSExpr Int)
	145	myParseRich = parseGeneral (toRich mySpec)
	146
	147	main = do
	148	assert $ myParse "(1 2 3)" == SCons 1 (SCons 2 (SCons 3 Nil))
	149	assert $ pack "(1 2 3)" == myEmit (SCons 1 (SCons 2 (SCons 3 Nil)))
	150	~~~~
	151
	152	For the most part, you shouldn't need to do this, but it is possible
	153	in case you'd want to.
	154
	155	# Reader Macros
	156
	157	Simple S-expressions are a common data storage format, but lots of
	158	Lisp-based uses of S-expressions include some facility for
	159	_reader macros_ as well. The most commonly seen reader macro in
	160	most Lisps is introduce by the quote, which allows you to execute
	161	a

+357

-0

posts/static-versus-dyn less more

	1	In this post, I'm going to argue that dynamic typing is not an inferior
	2	tool that is superseded by strong static typing, but rather an important
	3	tool in the toolkit of a programmer, and that different problem domains
	4	are best served by different tool sets.
	5
	6	# Defining Terms
	7
	8	These discussions tend to result in a lot of tedium because of
	9	terminological confusion, so let's start with this:
	10
	11	- Static Typing means that a programming language will reject
	12	certain programs as syntactically valid but semantically anomalous
	13	according to a 'type system', thus preventing programs which
	14	cause certain classes of run-time errors from ever being run.
	15
	16	- Dynamic Typing means that a programming language will execute
	17	syntactically valid programs, allowing those programs to execute
	18	until run-time errors are found, generally based on run-time values
	19	which correspond to the type of a value. Bob Harper, following in
	20	the tradition of Dana Scott, insists on calling these
	21	[http://existentialtype.wordpress.com/2011/03/19/dynamic-languages-are-static-languages/](unityped languages),
	22	but I will stick with the more common nomeclature.
	23
	24	- I to some degree make a tripartite distinction between languages
	25	with dynamic types, static types, and dependent types, despite the
	26	fact that
	27	dependent types are a kind of static type. A dependent type system
	28	allows types to depend on values, and I make the distinction because
	29	programming in a dependently typed system has a very different flavor
	30	than programming in a static but non-dependently-typed system.
	31
	32	# Why Do We Want To Use Type Systems?
	33
	34	Before defending dynamic languages, I want to start with something
	35	basic: why do we want static languages in the first place?
	36
	37	Type systems are a lightweight but formal system for reasoning about
	38	the correctness of programs. A type system is there to help the
	39	programmer write correct programs by preventing various classes of
	40	errors. Different type systems can catch different classes of
	41	errors: for example, even a very weak type system like C's will warn
	42	you if you attempt to take the square root of a string:
	43
	44	~~~~
	45	tmp.c:5:5: error: incompatible type for argument 1 of 'sqrt'
	46	...
	47	note: expected 'double' but argument is of type 'char *'
	48	~~~~
	49
	50	More and more powerful type systems can rule out more and more
	51	classes of bugs, while at the same time allowing programs that
	52	might be rejected with other type systems. Most ML-ish functional
	53	languages make it impossible to execute programs which would
	54	result in null pointer exceptions, which are still permitted by
	55	Java's type system. Haskell's type system makes a distinction
	56	between code that performs side effects and code that doesn't,
	57	and doesn't allow calling the former from the latter. Rust uses
	58	its type system to ensure memory safety while avoiding garbage
	59	collection.
	60
	61	At the far end of the spectrum, there are the dependently
	62	typed languages, which allow you to write executable mathematical
	63	objects that prevent errors in a way which is impossible to express in
	64	"weaker" statically typed languages. You encode your specification,
	65	then you write your program, and if your program doesn't fit
	66	the specification, it will fail to compile. This is sometimes
	67	considered an ultimate goal of strongly statically typed languages:
	68	the ability to express all errors as type errors so that they can
	69	be statically prevented.
	70
	71	There are other advantages, as well: one is performance-related. The
	72	more information we have about our program at _compile_ time,
	73	the less we need to know about it at _run_ time. If our program is
	74	constructed in such a way that we cannot dereference a null pointer, we
	75	don't need to check for null before dereferences. If our program
	76	is constructed in such a way that a given value is always a machine integer,
	77	we can perform machine addition without first checking an associated
	78	runtime tag. If our program is constructed in such a way that a given
	79	list is non-zero, we can omit runtime length checks.
	80
	81	Another is ergonomic: people who are new to typed languages will talk
	82	about "fighting the type system", but people who are comfortable with
	83	static type systems often express the opposite, using the types as a
	84	way of guiding you towards correct solutions. A basic practical example
	85	is using types as a way of distinguishing sanitized from unsanitized
	86	strings to prevent SQL injection. What was once a matter of thorough
	87	testing and careful code discipline becomes simple: if you do it wrong,
	88	your compiler will tell you.
	89
	90	All of this is to say: type systems are wonderful! They are an invaluable
	91	tool for writing code and verifying it correct, and I hope that everyone
	92	learns and uses at least one strong statically-typed language.
	93
	94	# Why Might You _Not_ Want To Use Type Systems?
	95
	96	I can think of three big reasons:
	97
	98	- For certain problems, _stronger types are a hurdle to expressing the
	99	program you want to express_.
	100	- Type systems are _not always amenable to the type discipline you
	101	want to impose_.
	102	- A program _can still perform correctness checks without a static
	103	type system_.
	104
	105	Let's go into these in detail:
	106
	107	## Expressing The Program You Want To Express
	108
	109	Some problems are very amenable to being expressed in a static setting!
	110	It's not uncommon to develop in a static system by writing a bunch of type
	111	signatures and gradually filling in their definitions, fitting pieces
	112	together as a kind of framework and then adding what's missing to the
	113	framework.
	114
	115	On the other hand, sometimes this is less helpful. Certain types are
	116	difficult to express in a static way, and appeasing the typechecker
	117	requires extra work which distracts from the program at hand. As
	118	Bob Harper informs us, dynamic languages can be seen as the subset
	119	of static languages in which all types have a single type. Among other
	120	things, this means that dynamic languages often have idioms well-suited
	121	to dispatch on a large or even unbounded set of runtime tags.
	122	Consequently, if your problem involves dealing with data in which you
	123	make decisions based on a lot of runtime characteristics (such as in
	124	scripting or shell invocation) it might be a good idea to take advantage
	125	of the structure afforded by dynamic languages!
	126
	127	One could, as Harper suggests, construct that unitype in a static
	128	language—but look at the tedium involved in, say,
	129	[manipulating JSON in Haskell](), and you can see that, at that point,
	130	you're not writing the program you want: you're writing the boilerplate
	131	to deal with the unitype. Your intention might be clear to you, but
	132	you also have to look through layers of constructors and types that
	133	are there mostly because the compiler needs them to be assured that
	134	your program is safe. (For a great example of this, look at the
	135	library I wrote for working with [Activity Steams]
	136
	137	Additionally, there is a []
	138
	139	## The Type Discipline You Want To Impose
	140
	141
	142
	143	# A Short Story From Real Life
	144
	145	A few months ago at my work, we did what amounted to a rewrite of a compiler
	146	system that was written in Haskell. Haskell's type system is inching
	147	towards being fully dependently typed, and already it can express
	148	vast swaths of what dependent types can achieve. We took full
	149	advantage of this, and the end result was pretty amazing. The AST was
	150	constructed in such a way that the types expressed not just the result
	151	types of the AST nodes, but also information about the use of those
	152	values, and prevented invalid ASTs from being constructed at any
	153	time.
	154
	155	As this system was being constructed, the type machinery was nothing
	156	short of invaluable. Multiple times, it caught subtle errors that would
	157	have likely gone unnoticed for long periods of time. It ensured
	158	end-to-end correctness of the program being written as we were writing
	159	it. Because of our chosen types, we were literally incapable of
	160	constructing invalid programs!
	161
	162	But as we continued to expand our compiler, this system started to be less
	163	and less helpful. It was getting in the way of optimizations:
	164	constructing the AST, in effect, required type machinery to 'prove'
	165	it was constructed correctly, so certain optimization passes
	166	required that one not merely rewrite the AST, but also perform
	167	elaborate manipulations on the types in order to convince the
	168	type-checker that the resulting ASTs were permissible. And frankly,
	169	it required substantial mental effort just to understand these
	170	type manipulations, to say nothing of the work involved in
	171	writing them in the first place. An optimization pass—correct
	172	on paper and conceptually straightforward—would take an inordinate
	173	amount of time and effort to write _and_ understand. The code
	174	had become brittle, difficult to reason about, and most importantly,
	175	difficult to change.
	176
	177	We ended up removing that extra type information, going back to
	178	an AST with no type parameters.
	179
	180	Except we didn't remove guarantees: we just encoded them with other
	181	kinds of checks. We still rule out invalid ASTs, we just don't do so by
	182	_construction_. We can still verify that an optimization pass produces
	183	correct code: we just run manually-written checks on the AST.
	184
	185	What's the summary here? Why did we ditch the stronger guarantees that
	186	the previous edition provided us? Well, in short:
	187
	188	- The ergonomic advantages were gone. _Working with more static type
	189	information took significantly more effort than working with the
	190	more weakly-typed AST._
	191	- The type system was _not amenable to the type information we wanted
	192	to encode_. In this case, I suspect we would have been better suited
	193	by a proper dependently typed language, as Haskell does not have the
	194	necessary features for manipulating the proofs needed to convince the
	195	type system of things.
	196	- The constraints we wanted to express _were still otherwise expressible_
	197	in ways that we could reason about: in fact, it was straightforward to
	198	express them as simple manually-written passes. In the end, our program
	199	_still checked the properties that needed to be checked_.
	200
	201	# How Does This Apply To Dynamic Typing?
	202
	203	After all, we didn't port our application from Haskell to Python. We
	204	obviously were still reaping the benefits of Haskell's rather strong
	205	static type system. So why am I citing this story in a blog post
	206	about _dynamic_ types?
	207
	208	Well, look at the three criteria that led to us reducing the amount
	209	of static type information. All three of these are also criteria that
	210	might push you to choose a dynamic language for a particular problem
	211	domain:
	212
	213	- The flip-side to Harper's argument that dynamic (or "unityped")
	214	languages are in fact statically typed is that if you're writing a
	215	program in which you will need to constantly check tags at runtime[^3], you
	216	might as well be using a dynamic language. After all—it's what you'd
	217	be doing in a static language, but much more concisely and
	218	ergonomically, right? For some problem domains, _dynamic types
	219	take less effort to write and understand than static types_.
	220	- A program in a dynamic language generally has some kind of informal
	221	type system applied by the programmer. Sometimes these systems can be
	222	quite difficult to express even in sophisticated systems like dependently
	223	typed languages[^1], and
	224	equally often multiple type disciplines can be applied simultaneously[^2].
	225	These _informal type disciplines can always be expressed in a dynamic
	226	language, but may not be straightforwardly expressible in particular
	227	static languages_.
	228	- Dynamic languages can still express types: sometimes at runtime and
	229	even sometimes (with the ability to write significantly powerful macros,
	230	as in the Lisp family) at compile-time. _You can still check the
	231	properties you want to check_.
	232
	233	Does this mean that you ought to run out and ditch your types? Of course
	234	not! Static typing is a powerful tool, and dependent static typing even
	235	more so. Dynamic languages are, after all, doing these checks at runtime,
	236	and there is _great_ value in using a tool that can verify properties
	237	statically. And for software that absolutely must be correct,
	238
	239	But there is also great value in understanding that a particular type system
	240	may or may not be amenable to the properties you want it to express, and that
	241	there are problem domains in which doing things in a dynamically typed
	242	language is reasonable or even advisable. It might even be possible to express
	243	the type discipline you're using in a static way, but not in an existing
	244	statically typed language. In that case, you could write it informally and
	245	gradually layer your type discipline onto the code in a checkable way.
	246
	247	Additionally, there are shades of static verification: great inroads have
	248	been made in the areas of gradual typing, which allows a system to be
	249	selectively dynamically and statically typed. Some systems would benefit
	250	from this! Some would not.
	251
	252	The
	253
	254	# Appendices: A Few Specific Objections to Dynamic Types
	255
	256	## Large Systems In Dynamically Typed Languages Are Unmaintainable or Unwritable
	257
	258	Proof: by construction. There are many, many large codebases in dynamically
	259	typed languages which have varying degrees of maintainability. This is also
	260	true of statically typed codebases.
	261
	262	## The Curry-Howard Isomorphism
	263
	264
	265
	266	[^1]: As a simple example, various idioms allow you to construct new types
	267	based on runtime values, as in Python's `namedtuple`. Generating new types
	268	in a dynamically typed language is incredibly straightforward, but doing
	269	the corresponding work in (say) Haskell requires a great deal of effort,
	270	including elaborate macro facilities and metaprogramming.
	271
	272	[^2]: This is exactly [what Matthias Felleisen says in a comment on Bob
	273	Harper's blog](http://existentialtype.wordpress.com/2014/04/21/bellman-confirms-a-suspicion/#comment-1362):
	274
	275	> The hidden blessing is that DT programmers can import, super-impose,
	276	> and integrate many different static type disciplines — informally — and
	277	> use them to design their code and to reason about it. The result are
	278	> programming idioms that no one ordinary static type system can
	279	> uniformly check, and that is why DT programmers cannot accept the
	280	> phrase “DT languages are statically typed.”
	281
	282	Note the similarity to how [Alan Key describes his original conception of
	283	objects](http://userpage.fu-berlin.de/~ram/pub/pub_jf47ht81Ht/doc_kay_oop_en):
	284
	285	> My math background made me realize that each object could have
	286	> several algebras associated with it, and there could be families of
	287	> these, and that these would be very very useful. The term
	288	> "polymorphism" was imposed much later (I think by Peter Wegner) and
	289	> it isn't quite valid, since it really comes from the nomenclature of
	290	> functions, and I wanted quite a bit more than functions. I made up a
	291	> term "genericity" for dealing with generic behaviors in a
	292	> quasi-algebraic form.
	293
	294	[^3]: One situation in which this is reasonable is in long-running or scripting
	295	languages. Erlang is a good example: you're probably gonna receive a lot of
	296	incorrect messages, so instead of a static type system in which you constantly
	297	branch on various tags, it provides a dynamic system. Similarly, success with
	298	statically typed operating system shells has been limited.
	299
	300
	301	# SNIPPETS
	302
	303	## But What About Curry-Howard?
	304
	305	The Curry-Howard Isomorphism is about the relationship between programs and
	306	proofs in a logic. It is very cool. It has _almost nothing_ to do with
	307	whether or not dynamic languages are worthwhile.
	308
	309	Firstly: the fact that programs correspond to a logic is a really strong
	310	advantage for two reasons:
	311
	312	1. It gives you the ability to reason about programming languages themselves
	313	using the tools of mathematics.
	314	2. It has important ramifications for the construction of dependently-typed
	315	languages, in which values and logic intermingle.
	316
	317	Notice that this doesn't say much about the programs written in a language
	318	like Haskell: that's because the Curry-Howard Isomorphism has nothing to
	319	say about whether they're "more correct" than programs written in another
	320	language. In a language that's not dependently typed, the fact that
	321	pairs correspond to conjunction doesn't have any bearing on their use.
	322
	323	You might argue that you get nice properties like
	324
	325	~~~~
	326	p: A * B p: A * B
	327	---------- ----------
	328	fst p: A snd p: B
	329	-----------------------
	330	(fst p, snd p): A * B
	331	~~~~
	332
	333	out of the logic—but Scheme has the exact same law
	334
	335	~~~~
	336	(pair? p) => p == (cons (car p) (cdr p))
	337	~~~~
	338
	339	and has no type system to correspond to any logic!
	340
	341	As an aside: the Curry-Howard Isomorphism is often perceived to be
	342	more magical than it is. People act as though it's some kind of
	343	peek into the deep structure of reality, but (and this is a
	344	bigger point) all mathematics is fundamentally a human invention.
	345	A very useful and elegant one! But still the product of people.[^people]
	346	Programming languages like the lambda-calculus came directly out of
	347	the field of logic, and were invented by people steeped in the same
	348	intellectual tradition: seeing the correspondance between the
	349	lambda-calculus and logic as evidence that logic is an "ontological
	350	ultimate" is like seeing the correspondance between French and
	351	Spanish as evidence that Romance languages are inscribed in the
	352	structure of the universe.
	353
	354	[^people]: One of the most disappointing things about the widespread
	355	belief that math is some kind of deep universal truth is that people
	356	tend to ignore the human factors such as culture and creativity that
	357	go into creating mathematical formalisms.

+59

-0

scraps/aquan-new.mtz less more

	1	{- Some example programs in the Matzo language
	2	-
	3	- It is important to note that, in the Matzo language, the value of a
	4	- variable is generally not a single value, but one of a set of values.
	5	- Every time a variable is used, an element is drawn from this set;
	6	- so the following program can print 1, 2, 3, 10, 20, or 30. -}
	7
	8	x = 1 \| 2 \| 3
	9	f = \ n -> n
	10	\| \ n -> n * 10
	11	puts f.x;
	12
	13
	14	{- The Aquan language program -}
	15
	16	{- The ::= operator is for the common case that we want to select from a
	17	- whole bunch of literal characters or words. It is referred to as the
	18	- 'literal assignment operator'. -}
	19	vowel ::= a e i o u
	20	consonant ::= p t k w h m n l
	21
	22	{- The := operator sets the value on the left to be
	23	syll = vowel \| consonant vowel
	24	\| vowel "'" \| consonant vowel "'"
	25	word = syll syll (6 @ syll)
	26
	27
	28	{- Fixing a value -}
	29
	30	{- Identifiers prefixed by 'fixed' stay the same: -}
	31
	32	n = 1 \| 2
	33	fixed m = n
	34	{- The following could print either "1,2", "2,1" "1,1" or "2,2" -}
	35	puts "{n},{n}"
	36	{- The following can only print "1,1" or "2,2" -}
	37	puts "{m},{m}"
	38
	39
	40	{- A random person description -}
	41
	42	{- Identifiers that begin with capitals are symbols. Symbols can be
	43	- compared and matched against. It is not allowed to use an identifier
	44	- that begins with a capital as a variable. -}
	45
	46	pronoun.Male = "he"
	47	pronoun.Female = "she"
	48	person.Male = "man"
	49	person.Female = "woman"
	50
	51	fixed gender = Male \| Female
	52	hair ::= long short
	53	hair_color ::= brown black blonde red white
	54	eye_color ::= blue black brown green
	55	mood ::= happy sad angry
	56
	57	puts "This {person.gender} has {hair}, {hair_color} hair and "
	58	"{eye_color} eyes; {pronoun.gender} appears to be "
	59	"quite {mood}."

+112

-0

scraps/aquan.html less more

	1	<html>
	2	<head>
	3	<style type="text/css">
	4	body {
	5	font-family: "Arial", "Helvetica", sans-ser<span class="keyword">if</span>;
	6	background-color: #ffffff;
	7	color: #111111;
	8	}
	9
	10	h2 {
	11	text-align: center;
	12	}
	13
	14	.main {
	15	background-color: #eeeeee;
	16	width: 800px;
	17	margin-left: auto;
	18	margin-right: auto;
	19	padding-top: 10px;
	20	padding-bottom: 20px;
	21	padding-left: 40px;
	22	padding-right: 40px;
	23	}
	24
	25	.keyword { color: #336699; }
	26	.constr { color: #993366; }
	27	.string { color: #339966; }
	28	.comment { color: #666666; }
	29	</style>
	30	</head>
	31	<body>
	32	<div class="main">
	33	<pre>
	34	<span class="comment">(** Some example programs in the Matzo language
	35	*
	36	* It is important to note that, in the Matzo language, the value of a
	37	* variable is generally not a single value, but one of a set of values.
	38	* Every time a variable is used, an element is drawn from this set;
	39	* so the following program can print 1, 2, 3, 10, 20, or 30. *)</span>
	40
	41	x := 1 \| 2 \| 3;
	42	f := { n => x }
	43	\| { n => n * 10 };
	44	<span class="keyword">puts</span> f(x);
	45
	46
	47	<span class="comment">(** The Aquan language program *)</span>
	48
	49	<span class="comment">(* The ::= operator is for the common case that we want to select from a
	50	* whole bunch of literal characters or words. It is referred to as the
	51	* 'literal assignment operator'. *)</span>
	52	vowel ::= <span class="string">a e i o u</span>;
	53	consonant ::= <span class="string">p t k w h m n l</span>;
	54
	55	<span class="comment">(* Normally, each alternative has equal weight, but explicit weights can be
	56	* given; weights which are not given are considered to be 1. *)</span>
	57	syll := 4: vowel \| 4: consonant vowel
	58	\| vowel <span class="string">"'"</span> \| consonant vowel <span class="string">"'"</span>;
	59
	60	<span class="comment">(* The `n @ value` syntax represents 1 to n possible repetitions of value.
	61	* e.g. 3 @ ("a" \| "b") is the same as
	62	* ( ("a" \| "b")
	63	* \| ("a" \| "b") ("a" \| "b")
	64	* \| ("a" \| "b") ("a" \| "b") ("a" \| "b") )
	65	* and is given special syntax because it is a common case in word generation.
	66	* Below, it means that every word will have at least three and at most eight
	67	* syllables. *)</span>
	68	word := syll syll (6 @ syll);
	69	<span class="keyword">puts</span> word;
	70
	71
	72	<span class="comment">(** Fixing a value *)</span>
	73
	74	<span class="comment">(* The built-in pseudo-function `choose` allows you to force a value
	75	* to stay the same. *)</span>
	76
	77	n := 1 \| 2;
	78	m := choose(n);
	79	<span class="comment">(* The following could print either "1,2", "2,1" "1,1" or "2,2" *)</span>
	80	<span class="keyword">puts</span> <span class="string">"{n},{n}"</span>
	81	<span class="comment">(* The following can only print "1,1" or "2,2" *)</span>
	82	<span class="keyword">puts</span> <span class="string">"{m},{m}"</span>
	83
	84
	85	<span class="comment">(** A random person description *)</span>
	86
	87	<span class="comment">(* Identifiers that begin with capitals are symbols. Symbols can be
	88	* compared and matched against. It is not allowed to use an identifier
	89	* that begins with a capital as a variable. *)</span>
	90
	91	pronoun :=
	92	{ <span class="constr">Male</span> => <span class="string">"he"</span>
	93	\| <span class="constr">Female</span> => <span class="string">"she"</span>
	94	};
	95	person :=
	96	{ <span class="constr">Male</span> => <span class="string">"man"</span>
	97	\| <span class="constr">Female</span> => <span class="string">"woman"</span>
	98	};
	99
	100	gender := choose(<span class="constr">Male</span> \| <span class="constr">Female</span>);
	101	hair ::= <span class="string">long short</span>;
	102	hair_color ::= <span class="string">brown black blonde red white</span>;
	103	eye_color ::= <span class="string">blue black brown green</span>;
	104	mood ::= <span class="string">happy sad angry</span>;
	105
	106	<span class="keyword">puts</span> <span class="string">"This {person(gender)} has {hair}, {hair_color} hair and "
	107	"{eye_color} eyes; {pronoun(gender)} appears to be "
	108	"quite {mood}."</span>;
	109	</pre>
	110	</div>
	111	</body>
	112	</html>

+62

-0

scraps/aquan.mt2 less more

	1	-- Some example programs in the Matzo language
	2	--
	3	-- It is important to note that, in the Matzo language, the value of a
	4	-- variable is generally not a single value, but one of a set of values.
	5	-- Every time a variable is used, an element is drawn from this set;
	6	-- so the following program can print 1, 2, 3, 10, 20, or 30.
	7
	8	x = 1 \| 2 \| 3
	9	f = (\ n . x )
	10	\| (\ n . n * 10 )
	11	puts f.x
	12
	13
	14	-- The Aquan language program
	15
	16	{- The ::= operator is for the common case that we want to select from a
	17	- whole bunch of literal characters or words. It is referred to as the
	18	- 'literal assignment operator'. -}
	19	vowel ::= a e i o u
	20	consonant ::= p t k w h m n l
	21
	22	{- The := operator sets the value on the left to be
	23	syll = vowel
	24	\| consonant vowel
	25	\| vowel "'"
	26	\| consonant vowel "'"
	27	word = syll syll (6 @ syll)
	28
	29
	30	{- Fixing a value -}
	31
	32	{- The built-in pseudo-function `choose` allows you to force a value
	33	* to stay the same. -}
	34
	35	n = 1 \| 2
	36	fixed m = n
	37	{- The following could print either "1,2", "2,1" "1,1" or "2,2" -}
	38	puts "{n},{n}"
	39	{- The following can only print "1,1" or "2,2" -}
	40	puts "{m},{m}"
	41
	42
	43	{- A random person description -}
	44
	45	{- Identifiers that begin with capitals are symbols. Symbols can be
	46	- compared and matched against. It is not allowed to use an identifier
	47	- that begins with a capital as a variable. -}
	48
	49	pronoun Male = "he"
	50	pronoun Female = "she"
	51	person Male = "man"
	52	person Female = "woman"
	53
	54	fixed gender = Male \| Female
	55	hair ::= long short
	56	hair_color ::= brown black blonde red white
	57	eye_color ::= blue black brown green
	58	mood ::= happy sad angry
	59
	60	puts "This {person.gender} has {hair}, {hair_color} hair and "
	61	"{eye_color} eyes; {pronoun.gender} appears to be "
	62	"quite {mood}."

+67

-0

scraps/aquan.mtz less more

	1	(** Some example programs in the Matzo language
	2	*
	3	* It is important to note that, in the Matzo language, the value of a
	4	* variable is generally not a single value, but one of a set of values.
	5	* Every time a variable is used, an element is drawn from this set;
	6	* so the following program can print 1, 2, 3, 10, 20, or 30. *)
	7
	8	x := 1 \| 2 \| 3;
	9	f := { n => x }
	10	\| { n => n * 10 };
	11	puts f(x);
	12
	13
	14	(* The Aquan language program *)
	15
	16	(* The ::= operator is for the common case that we want to select from a
	17	* whole bunch of literal characters or words. It is referred to as the
	18	* 'literal assignment operator'. *)
	19	vowel ::= a e i o u;
	20	consonant ::= p t k w h m n l;
	21
	22	(* The := operator sets the value on the left to be
	23	syll := vowel \| consonant vowel
	24	\| vowel "'" \| consonant vowel "'";
	25	word := syll syll (6 @ syll);
	26
	27
	28	(* Fixing a value *)
	29
	30	(* The built-in pseudo-function `choose` allows you to force a value
	31	* to stay the same. *)
	32
	33	n := 1 \| 2;
	34	m := n; fix m;
	35	(* The following could print either "1,2", "2,1" "1,1" or "2,2" *)
	36	puts "{n},{n}"
	37	(* The following can only print "1,1" or "2,2" *)
	38	puts "{m},{m}"
	39
	40
	41	(* A random person description *)
	42
	43	(* Identifiers that begin with capitals are symbols. Symbols can be
	44	* compared and matched against. It is not allowed to use an identifier
	45	* that begins with a capital as a variable. *)
	46
	47	pronoun :=
	48	{ Male => "he"
	49	; Female => "she"
	50	};
	51	person :=
	52	{ Male => "man"
	53	; Female => "woman"
	54	};
	55
	56	gender := Male \| Female;
	57	fix gender;
	58	hair ::= long short;
	59	hair_color ::= brown black blonde red white;
	60	eye_color ::= blue black brown green;
	61	mood ::= happy sad angry;
	62
	63	puts "This {person.gender} has {hair}, {hair_color} hair and "
	64	"{eye_color} eyes; {pronoun.gender} appears to be "
	65	"quite {mood}.";
	66
	67

+43

-0

scraps/cbpv-syntax less more

	1	V ::= [C] \| 0 \| V + V' \| 1 \| V * V'
	2	C ::= {V} \| V -> C \| top \| C & C'
	3
	4	neg : [ int -> {int} ]
	5	add : [ int -> int -> {int} ]
	6
	7	abs : [ int -> {int} ]
	8	abs = [ \ n: int.
	9	if n < 0
	10	then {0 - n}
	11	else {n}
	12	]
	13
	14	add : [ int -> {int} ]
	15	add = [ \ (x: int) (y: int) . { x + y } ]
	16
	17	data List a where
	18	Nil : List a
	19	Cons : a -o List a -o List a
	20
	21	nums : List Int
	22	nums = Cons(1, Cons(2, Cons(3, Nil)))
	23
	24	-- fn name arg1 arg2 ... argn = blah
	25	-- ==>
	26	-- name = [ \ arg1 arg2 ... argn . blah ]
	27
	28	sumlist : [ List Int -> {Int} ]
	29	fn sumlist (Cons x xs) = let y <- sumlist xs in { x + y }
	30	sumlist Nil = { 0 }
	31
	32	data Tree where
	33	Leaf : Int -o Tree
	34	Node : Tree * Tree -o Tree
	35
	36	codata Stream a where
	37	.head : {a}
	38	.tail : Stream
	39
	40	ones : Stream Int
	41	ones = rec this
	42	this.head <- [1]
	43	this.tail <- this

+64

-0

scraps/codata less more

	1	# Zippered Tree
	2
	3	data Path where
	4	Root : () -> Path
	5	Left : Path -> Path
	6	Right : Path -> Path
	7
	8	codata Tree : Path -> * -> * where
	9	node : Tree p a -> a
	10	left : Tree p a -> Tree (Left p) a
	11	right : Tree p a -> Tree (Right p) a
	12	unLeft : Tree (Left p) a -> Tree p a
	13	unRight : Tree (Right p) a -> Tree p a
	14
	15	ones : Tree Root Int
	16	ones =
	17	merge e from
	18	node e <- 1
	19	left e <- l e
	20	right e <- r e
	21	where
	22	l t = merge e from
	23	node e <- 1
	24	left e <- l e
	25	right e <- r e
	26	unLeft e <- t
	27	r t = merge e from
	28	node e <- 1
	29	left e <- l e
	30	right e <- r e
	31	unRight e <- t
	32
	33	# Typed Forth
	34
	35	codata Forth s where
	36	push : Forth b -> a -> Forth (a, b)
	37	drop : Forth (a, b) -> Forth b
	38	app : Forth (a, (b, c)) -> (a -> b -> d) -> Forth (d, c)
	39	get : Forth (a, b) -> a
	40
	41	prog1 : Int
	42	prog1 = empty
	43	.push 5
	44	.push 7
	45	.app (+)
	46	.get
	47
	48	empty : Forth ()
	49	empty = merge e from
	50	push e <- push0
	51
	52	push0 : a -> Forth (a, ())
	53	push0 x = merge e from
	54	push e <- push1 e
	55	drop e <- empty
	56	get e <- x
	57
	58	push1 : Forth (b, c) -> a -> Forth (a, (b, c))
	59	push1 f x = merge e from
	60	push e <- push1 e
	61	drop e <- f
	62	get e <- x
	63	app e <- doApp
	64	where doApp f =

+122

-0

scraps/dynparams less more

	1	I'm still working on the Matzo language, and I need to finish the parser (as
	2	I can now evaluate more programs than I can parse, and have code written to
	3	an as-yet unimplemented module spec.) Here are a few features I'm
	4	_considering_ but haven't implemented yet, and how they might interact:
	5
	6	## Dynamic Default Variables
	7
	8	This opens a small can of worms, so to be clear, dynamic variables will
	9	reside in a different namespace than normal variables. I don't know yet
	10	whether said namespace will be denoted with a sigil (say `#x`) or whether
	11	you might have to pass a symbol to a function (like `get.X`) to access
	12	their values. The idea is that certain functions will have sensible
	13	defaults that one might want to override, and rather than explicitly
	14	matching on arity, one can instead pass them in dynamically using a
	15	particular syntax. In the examples below, I'll use an explicit syntax for
	16	lookup of a dynamically scoped variable; in particular, `get.x.y` will
	17	look up a dynamically scoped variable `x` (typically a symbol), and if it
	18	hasn't been supplied, will use the default value `y`.
	19
	20	foo :=
	21	let combine := get.Combine.cat in
	22	combine."a"."b"
	23	bar := foo with Combine := (\ x y . x ";" y)
	24
	25	puts foo
	26	(* prints "ab", as it uses the default combiner `cat` *)
	27	puts bar
	28	(* prints "a;b" *)
	29
	30	This will continue down dynamically, so
	31
	32	s1 := (get.Pn."e") " stares intently."
	33	s2 := "It is clear that " (get.Pn."e")
	34	" is interested in what is happening."
	35	sent := se.<s1,s2>
	36	puts sent;
	37	puts sent with fixed Pn := "he" \| "she" \| "e"
	38
	39	will force `Pn` to be the chosen value in all subexpressions evaluated
	40	underneath the `with` clause. (Notice that nondeterminism still
	41	works in dynamically computed variables, so one must declare them as
	42	fixed at the binding site if you want them to be computed exactly
	43	once.)
	44
	45	## Text Combinators
	46
	47	I've used one of these above: right now, I have a few planned.
	48
	49	puts wd.<"a","b","c">
	50	(* prints "abc" *)
	51
	52	puts nm.<"a","b","c">
	53	(* prints "Abc" *)
	54
	55	puts se.<"a","b","c">
	56	(* prints "A b c" *)
	57
	58	puts pa.<"a","b","c">
	59	(* prints "A. B. C." *)
	60
	61	So effectively, they function as ways of combining strings in a more
	62	intelligent way. I plan for them to do some analysis of the strings so that
	63	they don't, say, produce extraneous spaces or punctuation. (The names are
	64	of course short for `word`, `name`, `sentence`, and `paragraph`, respectively,
	65	and may very well be alises for those longer names.)
	66
	67	## Rebindable Syntax
	68
	69	The conjunction of the previous two suggests that certain bits of syntax
	70	should be rebindable. A prominent example is concatenation, e.g.
	71
	72	puts "foo" "bar" "baz"
	73	(* prints "foobarbaz" *)
	74
	75	puts "foo" "bar" "baz" with Cat := se
	76	(* prints "Foo bar baz.", as normal string concatenation
	77	* has been overloaded by `se` *)
	78
	79	puts "foo" "bar" "baz" with Cat := fold.(\ x y -> x ";" y)
	80	(* prints "foo;bar;baz", as normal string concatenation
	81	* has been overloaded by a custom function *)
	82
	83	This could lead to some pretty phenomenal weirdness, though:
	84
	85	f := \ x y -> add.x.y
	86	puts f.<1,2> where App := \ f x . fold.(\ g y . g.y).(append.f.x)
	87	(* prints 3, as we have overloaded function application to
	88	* automatically uncurry functions *)
	89
	90	...so maybe there should be a limit on it.
	91
	92	## Error Handling
	93
	94	Still not sure on this one. I don't particularly want to bring monads
	95	into this, mostly because I want the language to be a DSL for strings
	96	and not a general-purpose programming language, but at the same time,
	97	it might be nice to have a simple exception-like mechanism. One idea
	98	I was playing with was to implement a backtracking system so that
	99	errors (both raised by users and by built-in problems) could simply
	100	resume at particular points and retry until some retry limit is reached.
	101	For example, you could reject certain unlikely combinations:
	102
	103	x ::= a b c d
	104	wd := let result := x x x x
	105	in if eq.result."aaaa" then raise Retry else result
	106	puts mark Retry in wd
	107
	108	Here, `mark exp in wd` corresponds roughly to the following imperative
	109	pseudocode:
	110
	111	{ retry_count := 0
	112	; while (retry_count < retry_max)
	113	{ try { return wd; }
	114	catch (exp) { retry_count ++; }
	115	}
	116	}
	117
	118	It's a much more limited form of exception handling, which may or may not
	119	be desirable, but does give you some ability to recover from errors so long
	120	as at least _some_ execution of your program will be error-less.
	121
	122	All this is heavily open to change, so we'll see.

+19

-0

scraps/jroute-samples less more

	1	#!/usr/bin/jroute
	2
	3	# for a quote
	4	{ "title" : "Quotes"
	5	, "usejs" : true
	6	, "copy" : "all quotes used under fair use &c &c"
	7	, "onload" : "\"doLoadHighlight()\""
	8	, "contents" :
	9	{ "quotelist" : [ { "content" : `(invoke "markdown" content)
	10	, ?("author" : `(invoke "markdown" author))
	11	}
	12	\| { "content": c, ?("author": a) } <- @(arg 1)
	13	]
	14	, "focus" : `(arg 0)
	15	}
	16	}
	17
	18	# for a work
	19	let ⏎

+91

-0

scraps/latka-proposal.md less more

	1	% Latka: A Language For Random Text Generation
	2	% Getty D. Ritter
	3
	4	Latka is a total, strongly typed functional programming language for
	5	generating random text according to predefined patterns. To this end, Latka
	6	incorporates weighted random choice as an effect in its expression language
	7	and provides a set of combinators for constructing high-level semantic
	8	constructs such as sentences and paragraphs. The eventual purpose of Latka
	9	is to produce code that can be embedded into other systems such as video
	10	games.
	11
	12	The primary operators of the expression language are concatenation (which is
	13	represented by juxtaposing two expressions, e.g., `e1 e2`) and choice
	14	(which is represented by a vertical bar to mimic BNF notation, e.g.,
	15	`e1 \| e2`), with weighted choice `n: e1 \| m: e2` (where `m` and `n`
	16	are `Nat`s) being a convenient syntactic sugar. Another piece of syntactic
	17	sugar is repetition, in which `n @ e` (where `n` is an expression of type `Nat`)
	18	stands for `n` repetitions of `e`. A simple program can be
	19	built out of just these primitives:
	20
	21	consonant, vowel, syllable : String
	22	consonant = "p" \| "t" \| "k" \| "w"
	23	\| "h" \| "m" \| "n"
	24	vowel = "a" \| "e" \| "i" \| "o" \| "u"
	25	syllable = let v = 5: vowel \| vowel "'" in
	26	5: consonant v \| 2: v
	27
	28	-- e.g., pate'hai, aku, e'epoto'
	29	puts (2 \| 3 \| 4 \| 5) @ syllable
	30
	31	By default, evaluation order is call-by-name so that repeated use of the same
	32	name will result in different values, but by prefixing any binding with the
	33	keyword `fixed` one can specify call-by-value evaluation order, effectively
	34	selecting a value consistently for the duration of the use of the name:
	35
	36	-- Can evaluate to aa, ab, ba, or bb
	37	puts let x = "a" \| "b" in x x
	38
	39	-- Can only ever evaluate to aa or bb
	40	puts let fixed x = "a" \| "b" in x x
	41
	42	Latka has a set of features like other strongly typed functional
	43	languages like Haskell or OCaml, including named sums as a datatype mechanism,
	44	pattern matching, and typeclasses for ad-hoc polymorphism.
	45	Like Coq or Agda, recursive functions are
	46	restricted to structural recursion to ensure termination of embedded
	47	Latka programs.
	48	Latka also includes a set of functions for structuring larger blocks of text.
	49	These take advantage of a particular aspect of the type system which allows
	50	for variadic polymorphism. In the example below, `sent` is one such function;
	51	it takes arbitrary-length tuples of values coercible to sentence fragments
	52	and intelligently converts them to a correct sentence with capitalization
	53	and punctuation.
	54
	55	samp : String * Word * Sentence
	56	samp = ("one",wd."two",sent.("three","four"))
	57
	58	-- prints "One two three four."
	59	puts sent.samp
	60
	61	Some of these examples can be seen in practice in the following program.
	62
	63	import Language.Natural.Latin as Latin
	64
	65	data Gender = Male \| Female
	66
	67	pronoun : Gender -> Word
	68	pronoun Male = wd."he"
	69	pronoun Female = wd."she"
	70
	71	noun : Gender -> Word
	72	noun Male = wd."man"
	73	noun Female = wd."woman"
	74
	75	puts let fixed g = Male \| Female \| Other in
	76	para.( sent.( "You see a Roman"
	77	, noun.g
	78	, "from"
	79	, proper_noun.(Latin/cityName)
	80	)
	81	, sent.( pronoun.g
	82	, "has"
	83	, ("brown" \| "black" \| "blonde")
	84	, "hair and carries"
	85	, range.50.500
	86	, "denarii"
	87	)
	88	)
	89	-- It might print, for example, "You see a
	90	-- Roman woman from Arucapa. She has black
	91	-- hair and carries 433 denarii."

+177

-0

scraps/latka-proposal.tex less more

	1	\documentclass[twocolumn]{article}
	2	\usepackage[T1]{fontenc}
	3	\usepackage{lmodern}
	4	\usepackage[cm]{fullpage}
	5	\renewcommand{\ttdefault}{pcr}
	6	\usepackage[usennames,dvipsnames]{xcolor}
	7	\usepackage{listings}
	8	\lstdefinelanguage{latka}
	9	{ morekeywords={puts,import,data,let,in,as,fixed}
	10	, sensitive=false
	11	, morecomment=[l]{--}
	12	, morestring=[b]"
	13	}
	14	\lstset{language=latka}
	15	\lstdefinestyle{lkstyle}
	16	{ basicstyle=\ttfamily\small
	17	, commentstyle=\itshape\color{black!60}
	18	% , keywordstyle=\bfseries\color{MidnightBlue}
	19	, keywordstyle=\bfseries
	20	% , stringstyle=\color{ForestGreen}
	21	, stringstyle=\color{black!65}
	22	, showstringspaces=false
	23	}
	24	\lstset{style=lkstyle}
	25	\usepackage{amssymb,amsmath}
	26	\usepackage{ifxetex,ifluatex}
	27	\usepackage{fixltx2e} % provides \textsubscript
	28	% use upquote if available, for straight quotes in lstlisting environments
	29	\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
	30	\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
	31	\usepackage[utf8]{inputenc}
	32	\else % if luatex or xelatex
	33	\usepackage{fontspec}
	34	\ifxetex
	35	\usepackage{xltxtra,xunicode}
	36	\fi
	37	\defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase}
	38	\newcommand{\euro}{€}
	39	\fi
	40	% use microtype if available
	41	\IfFileExists{microtype.sty}{\usepackage{microtype}}{}
	42	\ifxetex
	43	\usepackage[setpagesize=false, % page size defined by xetex
	44	unicode=false, % unicode breaks when used with xetex
	45	xetex]{hyperref}
	46	\else
	47	\usepackage[unicode=true]{hyperref}
	48	\fi
	49	\hypersetup{breaklinks=true,
	50	bookmarks=true,
	51	pdfauthor={Getty D. Ritter},
	52	pdftitle={Latka: A Language For Random Text Generation},
	53	colorlinks=true,
	54	urlcolor=blue,
	55	linkcolor=magenta,
	56	pdfborder={0 0 0}}
	57	\urlstyle{same} % don't use monospace font for urls
	58	\setlength{\parindent}{0pt}
	59	\setlength{\parskip}{6pt plus 2pt minus 1pt}
	60	\setlength{\emergencystretch}{3em} % prevent overfull lines
	61	\setcounter{secnumdepth}{0}
	62
	63	\title{Latka: A Language For Random Text Generation}
	64	\author{Getty D. Ritter\\
	65	Galois, Inc.\\
	66	\texttt{\small gdritter@galois.com}}
	67	\date{}
	68
	69	\begin{document}
	70	\maketitle
	71
	72	Latka is a total, strongly typed functional programming language for
	73	generating random text according to predefined patterns. To this end,
	74	Latka incorporates weighted random choice as an effect in its expression
	75	language and provides a set of combinators for constructing high-level
	76	semantic constructs such as sentences and paragraphs. The eventual
	77	purpose of Latka is to produce code that can be embedded into other
	78	systems such as video games.
	79
	80	The primary operators of the expression language are concatenation
	81	(which is represented by juxtaposing two expressions, e.g.,
	82	\texttt{e1 e2}) and choice (which is represented by a vertical bar to
	83	mimic BNF notation, e.g., \texttt{e1 \textbar{} e2}), with weighted
	84	choice \texttt{m: e1 \textbar{} n: e2} (where \texttt{m} and \texttt{n}
	85	are \texttt{Nat}s) being a convenient syntactic sugar. Another piece of
	86	syntactic sugar is repetition, in which \texttt{n @ e} (where \texttt{n}
	87	is an expression of type \texttt{Nat}) stands for \texttt{n} repetitions
	88	of \texttt{e}. A simple program can be built out of just these
	89	primitives:
	90
	91	\begin{lstlisting}
	92	consonant, vowel, syllable : String
	93	consonant = "p" \| "t" \| "k" \| "w"
	94	\| "h" \| "m" \| "n"
	95	vowel = "a" \| "e" \| "i" \| "o" \| "u"
	96	syllable = let v = 5: vowel \| vowel "'" in
	97	5: consonant v \| 2: v
	98
	99	-- e.g., pate'hai, aku, e'epoto'
	100	puts (2 \| 3 \| 4 \| 5) @ syllable
	101	\end{lstlisting}
	102
	103	By default, evaluation order is call-by-name so that repeated use of the
	104	same name will result in different values, but by prefixing any binding
	105	with the keyword \lstinline!fixed! one can specify call-by-value evaluation
	106	order, effectively selecting a value consistently for the duration of
	107	the use of the name:
	108
	109	\begin{lstlisting}
	110	-- Can evaluate to aa, ab, ba, or bb
	111	puts let x = "a" \| "b" in x x
	112
	113	-- Can only ever evaluate to aa or bb
	114	puts let fixed x = "a" \| "b" in x x
	115	\end{lstlisting}
	116
	117	Latka has a set of features like other strongly typed functional
	118	languages like Haskell or OCaml, including named sums as a datatype
	119	mechanism, pattern matching, and typeclasses for ad-hoc polymorphism.
	120	Like Coq or Agda, recursive functions are restricted to structural
	121	recursion to ensure termination of embedded Latka programs. Latka also
	122	includes a set of functions for structuring larger blocks of text. These
	123	take advantage of a particular aspect of the type system which allows
	124	for variadic polymorphism. In the example below,
	125	\texttt{sent} is one
	126	such function; it takes arbitrary-length tuples of values coercible to
	127	sentence fragments and intelligently converts them to a correct sentence
	128	with capitalization and punctuation.\footnote{Latka's function
	129	invocation syntax is an infixed \texttt{.} operator, borrowed from the
	130	notation Edsger W. Dijkstra outlined in
	131	\textit{The notational conventions I adopted, and why} (2000). This operator has
	132	the highest binding power and is left-associative so that
	133	\texttt{f.x.y == (f.x).y}.}
	134
	135	\begin{lstlisting}
	136	samp : String * Word * Sentence
	137	samp = ("one",wd."two",sent.("three","four"))
	138
	139	-- prints "One two three four."
	140	puts sent.samp
	141	\end{lstlisting}
	142
	143	Some of these examples can be seen in practice in the following program.
	144
	145	\begin{lstlisting}
	146	import Language.Natural.Latin as Latin
	147
	148	data Gender = Male \| Female
	149
	150	pronoun : Gender -> Word
	151	pronoun . Male = wd."he"
	152	pronoun . Female = wd."she"
	153
	154	noun : Gender -> Word
	155	noun . Male = wd."man"
	156	noun . Female = wd."woman"
	157
	158	puts let fixed g = Male \| Female in
	159	para.( sent.( "You see a Roman"
	160	, noun.g
	161	, "from"
	162	, proper_noun.(Latin/cityName)
	163	)
	164	, sent.( pronoun.g
	165	, "has"
	166	, ("brown"\|"black"\|"blonde")
	167	, "hair and carries"
	168	, range.50.500
	169	, "denarii"
	170	)
	171	)
	172	-- It might print, for example, "You see a
	173	-- Roman woman from Arucapa. She has black
	174	-- hair and carries 433 denarii."
	175	\end{lstlisting}
	176
	177	\end{document}

+4

-0

scraps/latka.md less more

	1	# Latka
	2	## Grammar
	3
	4	<COMMAND> := put <EXPR> \|

scraps/lkp.pdf less more

Binary diff not shown

+171

-0

scraps/lkp.tex less more

	1	\documentclass[twocolumn]{article}
	2	\usepackage[T1]{fontenc}
	3	\usepackage{lmodern}
	4	\usepackage[cm]{fullpage}
	5	\usepackage[usennames,dvipsnames]{xcolor}
	6	\usepackage{listings}
	7	\lstdefinelanguage{latka}
	8	{ morekeywords={puts,import,data,let,in,as,fixed}
	9	, sensitive=false
	10	, morecomment=[l]{--}
	11	, morestring=[b]"
	12	}
	13	\lstset{language=latka}
	14	\lstdefinestyle{lkstyle}
	15	{ basicstyle=\ttfamily\small
	16	, commentstyle=\color{Gray}
	17	, keywordstyle=\color{MidnightBlue}
	18	, stringstyle=\color{ForestGreen}
	19	, showstringspaces=false
	20	}
	21	\lstset{style=lkstyle}
	22	\usepackage{amssymb,amsmath}
	23	\usepackage{ifxetex,ifluatex}
	24	\usepackage{fixltx2e} % provides \textsubscript
	25	% use upquote if available, for straight quotes in lstlisting environments
	26	\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
	27	\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
	28	\usepackage[utf8]{inputenc}
	29	\else % if luatex or xelatex
	30	\usepackage{fontspec}
	31	\ifxetex
	32	\usepackage{xltxtra,xunicode}
	33	\fi
	34	\defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase}
	35	\newcommand{\euro}{€}
	36	\fi
	37	% use microtype if available
	38	\IfFileExists{microtype.sty}{\usepackage{microtype}}{}
	39	\ifxetex
	40	\usepackage[setpagesize=false, % page size defined by xetex
	41	unicode=false, % unicode breaks when used with xetex
	42	xetex]{hyperref}
	43	\else
	44	\usepackage[unicode=true]{hyperref}
	45	\fi
	46	\hypersetup{breaklinks=true,
	47	bookmarks=true,
	48	pdfauthor={Getty D. Ritter},
	49	pdftitle={Latka: A Language For Random Text Generation},
	50	colorlinks=true,
	51	urlcolor=blue,
	52	linkcolor=magenta,
	53	pdfborder={0 0 0}}
	54	\urlstyle{same} % don't use monospace font for urls
	55	\setlength{\parindent}{0pt}
	56	\setlength{\parskip}{6pt plus 2pt minus 1pt}
	57	\setlength{\emergencystretch}{3em} % prevent overfull lines
	58	\setcounter{secnumdepth}{0}
	59
	60	\title{Latka: A Language For Random Text Generation}
	61	\author{Getty D. Ritter}
	62	\date{}
	63
	64	\begin{document}
	65	\maketitle
	66
	67	Latka is a total, strongly typed functional programming language for
	68	generating random text according to predefined patterns. To this end,
	69	Latka incorporates weighted random choice as an effect in its expression
	70	language and provides a set of combinators for constructing high-level
	71	semantic constructs such as sentences and paragraphs. The eventual
	72	purpose of Latka is to produce code that can be embedded into other
	73	systems such as video games.
	74
	75	The primary operators of the expression language are concatenation
	76	(which is represented by juxtaposing two expressions, e.g.,
	77	\texttt{e1 e2}) and choice (which is represented by a vertical bar to
	78	mimic BNF notation, e.g., \texttt{e1 \textbar{} e2}), with weighted
	79	choice \texttt{n: e1 \textbar{} m: e2} (where \texttt{m} and \texttt{n}
	80	are \texttt{Nat}s) being a convenient syntactic sugar. Another piece of
	81	syntactic sugar is repetition, in which \texttt{n @ e} (where \texttt{n}
	82	is an expression of type \texttt{Nat}) stands for \texttt{n} repetitions
	83	of \texttt{e}. A simple program can be built out of just these
	84	primitives:
	85
	86	\begin{lstlisting}
	87	consonant, vowel, syllable : String
	88	consonant = "p" \| "t" \| "k" \| "w"
	89	\| "h" \| "m" \| "n"
	90	vowel = "a" \| "e" \| "i" \| "o" \| "u"
	91	syllable = let v = 5: vowel \| vowel "'" in
	92	5: consonant v \| 2: v
	93
	94	-- e.g., pate'hai, aku, e'epoto'
	95	puts (2 \| 3 \| 4 \| 5) @ syllable
	96	\end{lstlisting}
	97
	98	By default, evaluation order is call-by-name so that repeated use of the
	99	same name will result in different values, but by prefixing any binding
	100	with the keyword \texttt{fixed} one can specify call-by-value evaluation
	101	order, effectively selecting a value consistently for the duration of
	102	the use of the name:
	103
	104	\begin{lstlisting}
	105	-- Can evaluate to aa, ab, ba, or bb
	106	puts let x = "a" \| "b" in x x
	107
	108	-- Can only ever evaluate to aa or bb
	109	puts let fixed x = "a" \| "b" in x x
	110	\end{lstlisting}
	111
	112	Latka has a set of features like other strongly typed functional
	113	languages like Haskell or OCaml, including named sums as a datatype
	114	mechanism, pattern matching, and typeclasses for ad-hoc polymorphism.
	115	Like Coq or Agda, recursive functions are restricted to structural
	116	recursion to ensure termination of embedded Latka programs. Latka also
	117	includes a set of functions for structuring larger blocks of text. These
	118	take advantage of a particular aspect of the type system which allows
	119	for variadic polymorphism. In the example below,
	120	\texttt{sent} is one
	121	such function; it takes arbitrary-length tuples of values coercible to
	122	sentence fragments and intelligently converts them to a correct sentence
	123	with capitalization and punctuation.\footnote{Latka's function
	124	invocation syntax is an infixed \texttt{.} operator, borrowed from the
	125	notation Edsger W. Dijkstra outlined in his note EWD1300. This operator has
	126	the highest binding power and is left-associative so that
	127	\texttt{f.x.y == (f.x).y}.}
	128
	129	\begin{lstlisting}
	130	samp : String * Word * Sentence
	131	samp = ("one",wd."two",sent.("three","four"))
	132
	133	-- prints "One two three four."
	134	puts sent.samp
	135	\end{lstlisting}
	136
	137	Some of these examples can be seen in practice in the following program.
	138
	139	\begin{lstlisting}
	140	import Language.Natural.Latin as Latin
	141
	142	data Gender = Male \| Female
	143
	144	pronoun : Gender -> Word
	145	pronoun . Male = wd."he"
	146	pronoun . Female = wd."she"
	147
	148	noun : Gender -> Word
	149	noun . Male = wd."man"
	150	noun . Female = wd."woman"
	151
	152	puts let fixed g = Male \| Female in
	153	para.( sent.( "You see a Roman"
	154	, noun.g
	155	, "from"
	156	, proper_noun.(Latin/cityName)
	157	)
	158	, sent.( pronoun.g
	159	, "has"
	160	, ("brown"\|"black"\|"blonde")
	161	, "hair and carries"
	162	, range.50.500
	163	, "denarii"
	164	)
	165	)
	166	-- It might print, for example, "You see a
	167	-- Roman woman from Arucapa. She has black
	168	-- hair and carries 433 denarii."
	169	\end{lstlisting}
	170
	171	\end{document}

+2

-0

scraps/ndbl.bnf less more

	1	<document> ::= <group> <document> \| ""
	2	<group> ::= <pair> <group>⏎

+26

-0

scraps/rubbish.md less more

	1	# Basic types
	2
	3	Rubbish features a typical set of built-in values: integers which
	4	transparently become bignums, floating-point numbers, functions,
	5	tuples, and the soon-to-be-explained variants. Every missing value
	6	is represented by these. (Technically, numbers and tuples could
	7	be subsumed by variants---and are, in the semantics---but practically,
	8	they are treated as separate due to their wide use.)
	9
	10	Variants are an untyped version of the tagged sums featured in statically
	11	typed functional languages. A variant always begins with either a capital
	12	letter or a backtick.
	13
	14	Nullary variants are treated not unlike symbols in Lisps or Ruby. The
	15	only meaningful operations on them are comparison or pattern-matching.
	16	Certain nullary variants are used as distinguished values; for example,
	17	`False` is the false value and `Void` is the return type of most
	18	side-effectful functions. In addition, nullary variants subsume most
	19	uses of enumerations and function as keywords, e.g.
	20	`file := open("filename.txt", R)`.
	21
	22	However, variants can also take an arbitrary number of arguments. For
	23	example, evaluating `Point(1, 2+3)` results in the value `Point(1, 5)`;
	24	the intuition behind them can be captured in a number of ways: you
	25	can think of an n-ary variant as an (n+1)-ary variant with a symbol
	26	as its first element, or as ⏎

+185

-0

scraps/shell.md less more

	1	If I write my own shell---which I may very well do at some point---there's a
	2	particular process model I'd like to embed in it. To wit: in UNIX right now, each
	3	program in a pipeline has a single input stream and two output streams, with
	4	files/sockets/&c for other kinds of communication.
	5
	6	A pipeline of functions `foo \| bar \| baz` looks kind of like this
	7
	8	keyboard -> stdin stdout -> stdin stdout -> stdin stdout ---+
	9	>foo< >bar< >baz< \|
	10	stderr -+ stderr -+ stderr -+ \|
	11	\| \| \| \|
	12	v v v v
	13	[..................terminal...................]
	14
	15	Which works pretty well. You can do some pretty nice things with redirecting
	16	stderr to here and stdin from there and so forth, and it enables some nice
	17	terse shell invocations.
	18
	19	I'd like that basic system to be preserved, but with the ability to easily
	20	create other _named_ streams. For example, imagine a hypothetical version of
	21	wc which still outputs the relevant data to stdout, but also has three other
	22	streams with these names:
	23
	24	/ newlines
	25	\| words
	26	stdin -> wc-s -> \| bytes
	27	\| stdout
	28	\ stderr
	29
	30	You can always see the normal output of wc on stdout:
	31
	32	gdsh$ wc-s *
	33	2 3 6 this.txt
	34	10 20 30 that.c
	35	100 1000 10000 whatever.py
	36
	37	But you could also extract an individual stream from that invocation using
	38	special redirection operators:
	39
	40	gdsh$ wc-s * stdout>/dev/null bytes>&stdout
	41	3
	42	20
	43	1000
	44
	45	We could also have multiple input channels. I imagine an `fmt` command which
	46	can interpolate named streams, e.g.
	47
	48	gdsh$ printf "1\n2 3\n" \| wc-s \| fmt "bytes: {bytes}\n words: {words}\n nl: {newlines}\n"
	49	bytes: 6
	50	words: 3
	51	newlines: 2
	52
	53	We can then have a handful of other utilities and built-in shell operators for
	54	manipulating these other streams:
	55
	56	gdsh$ wc-s * \| select words
	57	3
	58	20
	59	1000
	60	gdsh$ fmt "this is {X}\n" X<this.txt
	61	1
	62	2 3
	63	gdsh$ !X='cat this.txt' fmt "this is {X}\n"
	64	1
	65	2 3
	66	gdsh$ @X=this.txt fmt "this is {X}\n"
	67	1
	68	2 3
	69	gdsh$ ^Y=recidivism fmt "Y is {Y}\n"
	70	recidivism
	71	gdsh$ wc-s * words>words.txt bytes>bytes.txt newlines>newlines.txt
	72	gdsh$ wc -s * \| split words=[sort >sorted-word-count.txt] bytes=[uniq >uniq-bytes.txt]
	73
	74	This could also enable new idioms for programming, e.g. verbose output, rather than being
	75	controlled by a flag, could be output consistently on another (usually hidden) stream:
	76
	77	gdsh$ myprog
	78	myprog: config file not found
	79	gdsh$ myprog stderr>/dev/null verbose>stderr
	80	Setting up context
	81	Looking in user dir... NOT FOUND
	82	Looking in global dir... NOT FOUND
	83	myprog: file not found
	84	Tearing down context
	85	Completed
	86
	87	Or maybe you could have human-readable error messages on `stderr` and
	88	machine-readable error messages on `jsonerr`:
	89
	90	gdsh$ thatprog
	91	ERROR: no filename given
	92	gdsh$ thatprog stderr>/dev/null jsonerr>stderr
	93	{"error-type":"fatal","error-code":30,"error-msg":"no filename given"}
	94
	95	There are other considerations I've glossed over here, but here are a few
	96	notes, advantages, and interactions:
	97
	98	- I glossed over the distinction between input/output streams. In practice,
	99	the _shell_ has no trouble disambiguating the two, but a given _program_
	100	may wish to consider the distinction between `words.in` and `words.out`;
	101	to this end, we could rename the existing streams `std.in` and `std.out` and
	102	`err.out` (it being an error to read from `err.in` in most cases.[^2])
	103
	104	- This is obviously not POSIX-compliant, but could be made to work with the
	105	existing UNIX process model by e.g. having a standard environment variable
	106	for stream-aware programs to look at which maps stream names to file
	107	descriptors. That way, programs which don't expect these special streams
	108	still use fds 0, 1, and 2 as expected, while programs that do handle
	109	these can read STREAM_DESC to find out which 'streams' correspond to which
	110	file descriptors. In that case, you can _almost_ use these commands with
	111	an existing shell by doing something like
	112
	113	sh$ echo foo >&77 \| STREAM_DESC='std.in:0 std.out:1 err.out:2 foo.in:77' fmt "foo is {foo}\n"
	114
	115	- If we do use an existing UNIX system to write this, then we also should
	116	integrate libraries for this, and the API for it is unknown. Presumably
	117	a C interface would have `int get_stream(char* stream_name)` that could
	118	return -1 on failure or something like it, but maybe another interface
	119	would be preferable.
	120
	121	- This would interact really interestingly with a semi-graphical shell[^1] that
	122	could visualize the stream relationships between commands as well as a
	123	shell with a higher-level understanding of data types.[^3]
	124
	125	[^1]: Don't cringe. Look---the input device with the most information density
	126	is the keyboard, right? That's why you use the command line at all. However,
	127	graphical systems have more information density than pure-text systems. You
	128	can take a pure-text system and extend it with position and color to give it
	129	more information, and then with charts and graphs to give it _more_ information,
	130	and so forth. What I'm proposing is _not_ drag-and-drop, although that might
	131	be useful to some users; it's a keyboard-driven system that displays
	132	information in a more dense, information-rich style. I keep thinking of building
	133	this myself but for the massive herds of yaks I'd have to shave first.
	134
	135	[^2]: Actually, I can imagine a use for this. Let's say my programming language
	136	of choice, Phosphorus, outputs its error messages in XML format, which is great
	137	for an IDE, but I need to invoke it on a remote server which doesn't have my
	138	IDE installed. I could
	139	have a program `phWrapper` that passes all streams through unchanged _except_ for
	140	`err.in`, which it parses as XML and then outputs as a kind of pretty-printed
	141	trace representation to `err.out`. In that case,
	142	`phosphorus my_source.ph \| phWrapper` will give me the output as well as a
	143	pretty-printed stack trace, while `phosphorus my_source.ph` will give me the
	144	monstrous XML traces.
	145
	146	[^3]: PowerShell is the usual example given here, but I confess I haven't used
	147	it. Effectively, all Unix processes expect to be given streams of raw text and
	148	output streams of raw text. Text is versatile, which is nice, but it would
	149	also be nice to have some basic structured data that isn't just streams of
	150	text. For example, imagine a Lisp-based system that expected everything to be
	151	S-expressions, so you can _guarantee_ that a program will get called with a
	152	valid s-expression as input. Now, `wc` would output something like
	153
	154	lsh$ (wc "this.txt" "that.c")
	155	(("this.txt" 2 4 6) ("that.c" 1 10 100))
	156
	157	or a system in which the data transmitted is JSON, as in
	158
	159	jsh$ wc ["this.txt", "that.c"]
	160	{ 'this.txt': {'newlines': 2, 'words': 4, 'characters': 6 }
	161	, 'that.c': {'newlines': 1, 'words': 10, 'characters': 100}
	162	}
	163
	164	Among other things, this means that you could cut down on the amount of
	165	serializing/deserializing that a system is doing and just pass in some raw
	166	in-memory structure. You also get some nice operations that subsume the role
	167	of `sed` and `awk` and its ilk, e.g.
	168
	169	lsh$ (map (\ ((fname nl wd cs)) (fmt #f "%s has %d bytes." fname cs))
	170	(wc "this.txt" "that.c"))
	171	("this.txt has 6 bytes" "that.c has 100 bytes")
	172
	173	You could then use piping and shells to do some _pretty_ elaborate stuff,
	174	including transferring higher-level objects (like HTTP requests or opaque
	175	OS-level objects like users or processes) in a universally understood way.
	176
	177	jsh$ ps \| map (fun (x) { return [x.cmd, x.args, x.pid, x.user]; })
	178	[ ["jsh", [], 4440, "gdritter"]
	179	, ["emacs", ["post.md"], 12893, "gdritter"]
	180	, ["ps", [], 29678, "gdritter"]
	181	]
	182
	183	So those are some ideas that have been drifting around in my head for a while.
	184	No idea if I'll ever implement any of them, or if they'd even be worth
	185	implementing, but I might get around to it at some point. We'll see.

+10

-0

scraps/teal less more

	1	data :list (a)
	2	\| :cons { a -o (box (:list a)) -o (:list a) }
	3	\| :nil (:list a)
	4
	5	def sum { (:list int) -> int }
	6	\| (:nil) => 0
	7	\| (:cons x xs) => { x + (sum (deref xs)) }
	8
	9	def range { int -> int -> (:list int) }
	10	\|

+82

-0

scraps/tup.md less more

	1	-- In this case, a^n corresponds to an n-tuple whose elements are of type a
	2	-- We have a typeclass
	3
	4	class Conv a b where conv :: a -> b
	5
	6	-- And types corresponding to
	7
	8	type Tup t = forall n : Nat. t ^ n
	9	type Var t = forall n : Nat. (forall s. Conv s t => s) ^ n
	10
	11	-- we also have these two built-in primitives
	12
	13	generalize :: Var t -> Tup t
	14	forget :: Tup t -> [t]
	15
	16	-- that will usually be used together
	17
	18	var :: Var t -> [t]
	19
	20	-- We could use this to create 'variadic' functions, e.g.
	21	-- a non-type-safe but C-like printf
	22
	23	data PFElem = PFInt Int \| PFFloat Float \| PFString String
	24
	25	instance Conv Int PFElem where conv = PFInt
	26	instance Conv Float PFElem where conv = PFFloat
	27	instance Conv String PFElem where conv = PFString
	28
	29	printf :: String -> Var PFElem -> IO ()
	30	printf spec args = putStr . printfInterpolate . var $ args
	31	where printfInterpolate :: String -> [PFElem] -> String
	32	printfInterpolate = {- ... -}
	33
	34	main :: IO ()
	35	main = printf "string %s and int %d\n" ("foo", 5)
	36
	37	--------------------------------------------------------------------------------
	38
	39	-- In Latka, we will have the `Var` type and the `var` primitive as built-ins,
	40	-- and treats a^n as a subtype of Var a in all cases. Thus, we can convert a
	41	-- value like <2,3> : Nat * Nat to a value of type Var Nat, but not the other
	42	-- way around.
	43
	44	-- With that machinery, we can define the following as a library:
	45
	46	-- word
	47	instance Conv String WordPart where {- ... -}
	48	instance Conv Word WordPart where {- ... -}
	49
	50	wd : Var WordPart -> Word
	51	wd = {- ... -}
	52
	53	-- this is well-typed, because its argument is of type String * Word <: Var WordPart
	54	puts wd.<"foo", wd.<"bar", "baz">>
	55	-- this is not, because its argument is of type String * String * (Nat * Nat)
	56	-- puts wd.<"a","b",<1,2>>
	57
	58
	59	-- sentence
	60	instance Conv String SentencePart where {- ... -}
	61	instance Conv Word SentencePart where {- ... -}
	62	instance Conv Sentence SentencePart where {- ... -}
	63
	64	se : Var SentencePart -> Sentence
	65	se = {- ... -}
	66
	67	-- this is well-typed, as the argument is of type
	68	-- String * Word * Sentence <: Var SentencePart
	69	puts se.<"one", wd.<"two", "three">, se.<"four", "five">>
	70
	71	-- &c
	72
	73	--this gives us type-safe variadic functions, as well as a nice mechanism for
	74	--semi-implicitly stringifying things
	75
	76	cat : Var String -> String
	77	cat vs = go.(var.vs)
	78	where go [] = ""
	79	go (x::xs) = x (go.xs)
	80
	81	{- prints "foo5<>" -}
	82	puts cat.<"foo", 5, <>>

+62

-0

scraps/typed-latka.md less more

	1	(* Axiom: all extant Latka programs without functions should continue
	2	* to work the same way, e.g. *)
	3
	4	cons = $(p t k w h)
	5	vowel = $(a e i o u) (4: "" \| "")
	6	syll = 4: cons vowel \| vowel
	7	puts syll (rep.6.syll)
	8
	9	(* We can now define rep in-language as
	10	* rep (fixed n : Nat) (s : String) : String =
	11	case n of
	12	0 . ""
	13	1 . s
	14	_ . s (rep.(pred.n).s)
	15	*)
	16
	17	(* It is acceptable that all arguments must be explicitly specified. *)
	18
	19	fact (fixed n : Nat) : Nat=
	20	case n of
	21	0 . 0
	22	_ . mul.n.(fact.(pred.n))
	23
	24	(* We have both labeled sums and records, where the records are closer to
	25	* the ML style than the Haskell style. `alias` is Haskell's `type` *)
	26
	27	alias Person =
	28	{ name : String
	29	, age : Nat
	30	}
	31
	32	data Character =
	33	Character Person
	34
	35	(* Type variables work as in Haskell *)
	36
	37	data List a = Nil \| Cons a (List a)
	38
	39	map (f : a -> b) (ls : List a) =
	40	case ls of
	41	Cons x xs . Cons.(f.x).xs
	42	_ . Nil
	43
	44	(* We can explicitly indicate a subtyping (convertibility)
	45	* relationship, so long as our conv instances form a DAG. *)
	46
	47	data TaggedList a = TNil \| TCons a String (List a)
	48
	49	conv TaggedList a <: List a where
	50	coerce ls = case ls of
	51	TCons x _ xs . Cons x xs
	52	TNil . Nil
	53
	54	(* Among other things, this is used in variadic functions parameters. *)
	55
	56	appendAll (lsts : Variadic (List a)) : List a =
	57	let go ls = case ls of
	58	[] . []
	59	(x::xs) . append.x.(go.xs)
	60	in go lsts
	61
	62	puts cat.(appendAll.(["x","y"], TCons "z" TNil))