add "pipelines considered harmful" post
This commit is contained in:
parent
7e6a0c1de4
commit
997372fb77
4 changed files with 209 additions and 30 deletions
18
Gemfile
18
Gemfile
|
@ -1,10 +1,15 @@
|
||||||
source "https://rubygems.org"
|
source "https://rubygems.org"
|
||||||
|
|
||||||
gem "jekyll", "~> 4.2.0"
|
gem "webrick", "~> 1.7"
|
||||||
|
gem "jekyll", "~> 4.2"
|
||||||
group :jekyll_plugins do
|
group :jekyll_plugins do
|
||||||
gem "jekyll-feed", "~> 0.12"
|
gem "jekyll-feed", "~> 0.12"
|
||||||
gem "jekyll-seo-tag", "~> 2.7"
|
gem "jekyll-seo-tag", "~> 2.8"
|
||||||
|
end
|
||||||
|
|
||||||
|
require "rbconfig"
|
||||||
|
if RbConfig::CONFIG["target_os"] =~ /(?i-mx:bsd|dragonfly)/
|
||||||
|
gem "rb-kqueue", ">= 0.2"
|
||||||
end
|
end
|
||||||
|
|
||||||
platforms :mingw, :x64_mingw, :mswin, :jruby do
|
platforms :mingw, :x64_mingw, :mswin, :jruby do
|
||||||
|
@ -12,9 +17,6 @@ platforms :mingw, :x64_mingw, :mswin, :jruby do
|
||||||
gem "tzinfo-data"
|
gem "tzinfo-data"
|
||||||
end
|
end
|
||||||
|
|
||||||
gem "wdm", "~> 0.1.1", :platforms => [:mingw, :x64_mingw, :mswin]
|
platforms :mingw, :x64_mingw, :mswin do
|
||||||
|
gem "wdm", "~> 0.1.1"
|
||||||
require 'rbconfig'
|
|
||||||
if RbConfig::CONFIG['target_os'] =~ /(?i-mx:bsd|dragonfly)/
|
|
||||||
gem 'rb-kqueue', '>= 0.2'
|
|
||||||
end
|
end
|
||||||
|
|
39
Gemfile.lock
39
Gemfile.lock
|
@ -1,20 +1,20 @@
|
||||||
GEM
|
GEM
|
||||||
remote: https://rubygems.org/
|
remote: https://rubygems.org/
|
||||||
specs:
|
specs:
|
||||||
addressable (2.8.0)
|
addressable (2.8.1)
|
||||||
public_suffix (>= 2.0.2, < 5.0)
|
public_suffix (>= 2.0.2, < 6.0)
|
||||||
colorator (1.1.0)
|
colorator (1.1.0)
|
||||||
concurrent-ruby (1.1.9)
|
concurrent-ruby (1.1.10)
|
||||||
em-websocket (0.5.3)
|
em-websocket (0.5.3)
|
||||||
eventmachine (>= 0.12.9)
|
eventmachine (>= 0.12.9)
|
||||||
http_parser.rb (~> 0)
|
http_parser.rb (~> 0)
|
||||||
eventmachine (1.2.7)
|
eventmachine (1.2.7)
|
||||||
ffi (1.15.4)
|
ffi (1.15.5)
|
||||||
forwardable-extended (2.6.0)
|
forwardable-extended (2.6.0)
|
||||||
http_parser.rb (0.8.0)
|
http_parser.rb (0.8.0)
|
||||||
i18n (1.8.11)
|
i18n (1.12.0)
|
||||||
concurrent-ruby (~> 1.0)
|
concurrent-ruby (~> 1.0)
|
||||||
jekyll (4.2.1)
|
jekyll (4.2.2)
|
||||||
addressable (~> 2.4)
|
addressable (~> 2.4)
|
||||||
colorator (~> 1.0)
|
colorator (~> 1.0)
|
||||||
em-websocket (~> 0.5)
|
em-websocket (~> 0.5)
|
||||||
|
@ -29,52 +29,53 @@ GEM
|
||||||
rouge (~> 3.0)
|
rouge (~> 3.0)
|
||||||
safe_yaml (~> 1.0)
|
safe_yaml (~> 1.0)
|
||||||
terminal-table (~> 2.0)
|
terminal-table (~> 2.0)
|
||||||
jekyll-feed (0.15.1)
|
jekyll-feed (0.16.0)
|
||||||
jekyll (>= 3.7, < 5.0)
|
jekyll (>= 3.7, < 5.0)
|
||||||
jekyll-sass-converter (2.1.0)
|
jekyll-sass-converter (2.2.0)
|
||||||
sassc (> 2.0.1, < 3.0)
|
sassc (> 2.0.1, < 3.0)
|
||||||
jekyll-seo-tag (2.7.1)
|
jekyll-seo-tag (2.8.0)
|
||||||
jekyll (>= 3.8, < 5.0)
|
jekyll (>= 3.8, < 5.0)
|
||||||
jekyll-watch (2.2.1)
|
jekyll-watch (2.2.1)
|
||||||
listen (~> 3.0)
|
listen (~> 3.0)
|
||||||
kramdown (2.3.1)
|
kramdown (2.4.0)
|
||||||
rexml
|
rexml
|
||||||
kramdown-parser-gfm (1.1.0)
|
kramdown-parser-gfm (1.1.0)
|
||||||
kramdown (~> 2.0)
|
kramdown (~> 2.0)
|
||||||
liquid (4.0.3)
|
liquid (4.0.3)
|
||||||
listen (3.7.0)
|
listen (3.7.1)
|
||||||
rb-fsevent (~> 0.10, >= 0.10.3)
|
rb-fsevent (~> 0.10, >= 0.10.3)
|
||||||
rb-inotify (~> 0.9, >= 0.9.10)
|
rb-inotify (~> 0.9, >= 0.9.10)
|
||||||
mercenary (0.4.0)
|
mercenary (0.4.0)
|
||||||
pathutil (0.16.2)
|
pathutil (0.16.2)
|
||||||
forwardable-extended (~> 2.6)
|
forwardable-extended (~> 2.6)
|
||||||
public_suffix (4.0.6)
|
public_suffix (5.0.0)
|
||||||
rb-fsevent (0.11.0)
|
rb-fsevent (0.11.1)
|
||||||
rb-inotify (0.10.1)
|
rb-inotify (0.10.1)
|
||||||
ffi (~> 1.0)
|
ffi (~> 1.0)
|
||||||
rb-kqueue (0.2.7)
|
rb-kqueue (0.2.8)
|
||||||
ffi (>= 0.5.0)
|
ffi (>= 0.5.0)
|
||||||
rexml (3.2.5)
|
rexml (3.2.5)
|
||||||
rouge (3.26.1)
|
rouge (3.30.0)
|
||||||
safe_yaml (1.0.5)
|
safe_yaml (1.0.5)
|
||||||
sassc (2.4.0)
|
sassc (2.4.0)
|
||||||
ffi (~> 1.9)
|
ffi (~> 1.9)
|
||||||
terminal-table (2.0.0)
|
terminal-table (2.0.0)
|
||||||
unicode-display_width (~> 1.1, >= 1.1.1)
|
unicode-display_width (~> 1.1, >= 1.1.1)
|
||||||
unicode-display_width (1.8.0)
|
unicode-display_width (1.8.0)
|
||||||
|
webrick (1.7.0)
|
||||||
|
|
||||||
PLATFORMS
|
PLATFORMS
|
||||||
amd64-freebsd-12
|
|
||||||
amd64-freebsd-13
|
amd64-freebsd-13
|
||||||
|
|
||||||
DEPENDENCIES
|
DEPENDENCIES
|
||||||
jekyll (~> 4.2.0)
|
jekyll (~> 4.2)
|
||||||
jekyll-feed (~> 0.12)
|
jekyll-feed (~> 0.12)
|
||||||
jekyll-seo-tag (~> 2.7)
|
jekyll-seo-tag (~> 2.8)
|
||||||
rb-kqueue (>= 0.2)
|
rb-kqueue (>= 0.2)
|
||||||
tzinfo (~> 1.2)
|
tzinfo (~> 1.2)
|
||||||
tzinfo-data
|
tzinfo-data
|
||||||
wdm (~> 0.1.1)
|
wdm (~> 0.1.1)
|
||||||
|
webrick (~> 1.7)
|
||||||
|
|
||||||
BUNDLED WITH
|
BUNDLED WITH
|
||||||
2.2.27
|
2.3.21
|
||||||
|
|
10
_config.yml
10
_config.yml
|
@ -2,16 +2,20 @@ title: fef's blog
|
||||||
description: Shitposts about tech, gender, and the Big Sad.
|
description: Shitposts about tech, gender, and the Big Sad.
|
||||||
baseurl: "/"
|
baseurl: "/"
|
||||||
url: "https://fef.moe"
|
url: "https://fef.moe"
|
||||||
hs_url: "http://pv3gsakv2vlbknfnet7nkn532zzejowdxwrxajnrtasadwnitsgpawyd.onion"
|
plugins:
|
||||||
|
- jekyll-seo-tag
|
||||||
|
mirrors:
|
||||||
|
tor: "http://pv3gsakv2vlbknfnet7nkn532zzejowdxwrxajnrtasadwnitsgpawyd.onion"
|
||||||
|
dn42: "https://fef.dn42"
|
||||||
author:
|
author:
|
||||||
name: anna
|
name: anna
|
||||||
email: owo -at- fef.moe
|
email: owo -at- fef . moe
|
||||||
pgp:
|
pgp:
|
||||||
fingerprint: "9D39A75A5CDA0A5DAE78F92EEC22E476DC2D3D84"
|
fingerprint: "9D39A75A5CDA0A5DAE78F92EEC22E476DC2D3D84"
|
||||||
url: "/pgp.txt"
|
url: "/pgp.txt"
|
||||||
fedi:
|
fedi:
|
||||||
user: fef
|
user: fef
|
||||||
host: notbird.site
|
host: catcatnya.com
|
||||||
twitter: libfef
|
twitter: libfef
|
||||||
matrix:
|
matrix:
|
||||||
user: fef
|
user: fef
|
||||||
|
|
172
_posts/2023-02-21-pipelines-considered-harmful.md
Normal file
172
_posts/2023-02-21-pipelines-considered-harmful.md
Normal file
|
@ -0,0 +1,172 @@
|
||||||
|
---
|
||||||
|
layout: post
|
||||||
|
title: "Pipelines Considered Harmful"
|
||||||
|
tags: tech
|
||||||
|
---
|
||||||
|
|
||||||
|
Today, we're gonna have a closer look at pipes and what i consider "harmful" about them.
|
||||||
|
Because i just *love* spicy takes that make a certain group of people with unreasonably strong
|
||||||
|
opinions about computers really angry.
|
||||||
|
|
||||||
|
UNIX pipelines, or just pipes for short, have been around for over half a century.
|
||||||
|
The fact alone that they still see widespread use today is a manifestation of the genius behind their idea.
|
||||||
|
However, they have some limitations that we apparently just accepted to have to live with.
|
||||||
|
|
||||||
|
## What Is A Pipe?
|
||||||
|
|
||||||
|
Before we dive in, though, let's briefly go over what pipelines are and what makes them so useful,
|
||||||
|
just to make sure we're all on the same page.
|
||||||
|
I know that you know what a pipe is, but i promise the following paragraphs are relevant to my point.
|
||||||
|
|
||||||
|
Pipelines are an IPC mechanism that (at least in this form) first appeared in the UNIX operating system.
|
||||||
|
They consist of a chain of multiple processes, where the standard output of the first one feeds
|
||||||
|
into the standard input of the next one, and so on.
|
||||||
|
|
||||||
|
In conjunction with the basic UNIX shell tools like
|
||||||
|
[`grep(1)`](https://man.freebsd.org/cgi/man.cgi?query=grep&manpath=FreeBSD+13.1-RELEASE)
|
||||||
|
or
|
||||||
|
[`sort(1)`](https://man.freebsd.org/cgi/man.cgi?query=sort&manpath=FreeBSD+13.1-RELEASE),
|
||||||
|
and even powerful stream processors like
|
||||||
|
[`awk(1)`](https://man.freebsd.org/cgi/man.cgi?query=awk&manpath=FreeBSD+13.1-RELEASE),
|
||||||
|
they enable composing sophisticated scripts within just a single line.
|
||||||
|
This embracing of modularity over monolithic designs is a core aspect of the
|
||||||
|
[UNIX philosophy](https://en.wikipedia.org/wiki/Unix_philosophy).
|
||||||
|
And it makes perfect sense from the perspective of a programmer like
|
||||||
|
[Douglas McIlroy](https://en.wikipedia.org/wiki/Doug_McIlroy)
|
||||||
|
(the dude who came up with pipes) because programming just so happens to be all about
|
||||||
|
abstraction and keeping things modular.
|
||||||
|
|
||||||
|
## Plaintext Is Simple, Stupid
|
||||||
|
|
||||||
|
An interesting aspect of UNIX command-line tools is that they operate predominantly on plaintext.
|
||||||
|
And this is no coincidence.
|
||||||
|
[Peter H. Salus](https://en.wikipedia.org/wiki/Peter_H._Salus)
|
||||||
|
summarized Ilroy's documentation of the UNIX philosophy as follows:
|
||||||
|
|
||||||
|
- Make programs that do one thing and do it well.
|
||||||
|
- Write programs to work together.
|
||||||
|
- Write programs to handle text streams, because that is a universal interface.
|
||||||
|
|
||||||
|
What i'm obviously referring to is the third point, the one about plaintext.
|
||||||
|
And i'm not saying it's wrong, but i want you to take a moment to think about this.
|
||||||
|
At the time, the most complicated data you probably ever had to deal with was the output of `ls -l`.
|
||||||
|
What's more, the vastness of different kinds of data has exploded since then.
|
||||||
|
|
||||||
|
To give a practical example of where i'm headed, i have a small script that runs whenever i open up
|
||||||
|
a new shell.
|
||||||
|
It fetches my
|
||||||
|
[canary message](https://fef.moe/canary.txt)
|
||||||
|
using
|
||||||
|
[`curl(1)`](https://man.freebsd.org/cgi/man.cgi?query=curl&manpath=FreeBSD+13.1-RELEASE+and+Ports),
|
||||||
|
and extracts its `last-modified` HTTP header.
|
||||||
|
It then compares this header with the current time and, if it is more than six days in the past,
|
||||||
|
prints a message to the standard output reminding me to update the canary.
|
||||||
|
|
||||||
|
Sounds simple, right?
|
||||||
|
I must admit there is probably a more elegant solution, but the entire point of pipelines is to
|
||||||
|
*avoid* having to spend too much time with such seemingly simple tasks.
|
||||||
|
Also, the version below is somewhat simplified to make it easier to read;
|
||||||
|
my original version is a true one-liner.
|
||||||
|
Anyway, here goes:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
SIX_DAYS=518400
|
||||||
|
CANARY_DATE=`curl -I https://fef.moe/canary.txt |
|
||||||
|
grep -i last-modified |
|
||||||
|
cut -c 16-` # cut off the "last-modified: " part
|
||||||
|
[ $(echo `date +%s` - `date -d "$CANARY_DATE" +%s` | bc) -ge $SIX_DAYS ] &&
|
||||||
|
echo "update your canary ffs"
|
||||||
|
```
|
||||||
|
|
||||||
|
This works.
|
||||||
|
However, i believe it is unnecessarily complex.
|
||||||
|
It is the year 2023, and for some reason we still use the same shells as in the 1970s.
|
||||||
|
Sure, zsh makes your life way easier, but no matter what you use,
|
||||||
|
you're still limited to piping text from one program to another.
|
||||||
|
|
||||||
|
Yes, HTTP response headers *can* be represented as plaintext; here is the output from the `curl`
|
||||||
|
command in the example above:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ curl -I https://fef.moe/canary.txt
|
||||||
|
HTTP/2 200
|
||||||
|
server: nginx/1.22.1
|
||||||
|
date: Tue, 21 Feb 2023 15:11:30 GMT
|
||||||
|
content-type: text/plain; charset=utf-8
|
||||||
|
content-length: 3347
|
||||||
|
last-modified: Tue, 21 Feb 2023 01:35:24 GMT
|
||||||
|
vary: Accept-Encoding
|
||||||
|
|
||||||
|
(remaining headers omitted for brevity)
|
||||||
|
```
|
||||||
|
|
||||||
|
## The goto Of Data
|
||||||
|
|
||||||
|
But do we *have* to do it like that?
|
||||||
|
|
||||||
|
The thing that makes pipelines "harmful" to me is that plaintext is just too dynamic.
|
||||||
|
It could be *anything*.
|
||||||
|
Parsing is a wildly complex problem of computer science,
|
||||||
|
so why would we do that if there was an easier way?
|
||||||
|
|
||||||
|
In his original article, Dijkstra considered the `goto` statement harmful because it lacks
|
||||||
|
the clear structure of an `if` statement.
|
||||||
|
This might be a little far-fetched, but you could view plaintext as the "goto of data".
|
||||||
|
|
||||||
|
## Getting Spicy
|
||||||
|
|
||||||
|
Computers have become powerful enough that performance pretty much isn't an issue for
|
||||||
|
command-line tools anymore.
|
||||||
|
Except for `dnf`; that one is just horrible.
|
||||||
|
|
||||||
|
Microsoft have been exploring other ways of piping data from one program to another:
|
||||||
|
[objects](https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_objects?view=powershell-7.3).
|
||||||
|
Now, i don't really like PowerShell because it has several other poor design decisions,
|
||||||
|
and the fact that it is a Microsoft invention is generally a gigantic red flag.
|
||||||
|
However, i do believe that we could learn something from it.
|
||||||
|
Why can't my shell script look something like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
SIX_DAYS=518400
|
||||||
|
CANARY_DATE=`curl -I https://fef.moe/canary.txt`.headers['last-modified']
|
||||||
|
[ $(echo `date +%s` - `date -d "$CANARY_DATE" +%s` | bc) -ge $SIX_DAYS ] &&
|
||||||
|
echo "update your canary ffs"
|
||||||
|
```
|
||||||
|
|
||||||
|
I'm not saying this syntax is good, what i care about is the idea.
|
||||||
|
That program output *can* have a machine-readable structure, kind of like a JSON object.
|
||||||
|
If someone were to design a specification for UNIX objects
|
||||||
|
**and included a way to transform these objects back into plaintext for backwards compatibility**,
|
||||||
|
i believe we would have a much easier time dealing with computers.
|
||||||
|
Basically, something like this:
|
||||||
|
|
||||||
|
```c
|
||||||
|
struct attribute {
|
||||||
|
char *name;
|
||||||
|
char *value;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct object {
|
||||||
|
struct attribute *attributes;
|
||||||
|
char *(*to_string)(struct object *);
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
The `to_string` method would simply transform the object into the old-school textual representation.
|
||||||
|
An actual implementation would of course have to deal with nested attributes and so on,
|
||||||
|
but again, this is about the basic idea and not the details.
|
||||||
|
|
||||||
|
## Computer Science Is A Process
|
||||||
|
|
||||||
|
Don't get me wrong.
|
||||||
|
As i clearly stated at the beginning of this article, i find the idea of pipelines as they exist
|
||||||
|
today nothing short of ingenious.
|
||||||
|
However, the field of programming is for some reason incredibly conservative.
|
||||||
|
Why wouldn't we challenge ideas of the past, even if they work well for *most* use-cases?
|
||||||
|
All that this "if it ain't broke, don't fix it" mentality has given us is an inability to spot
|
||||||
|
shortcomings and limitations of existing technology.
|
||||||
|
And that is, in my IMO, just sad.
|
||||||
|
|
||||||
|
I'm not saying we should throw away everything.
|
||||||
|
Just that maybe we should reevaluate our design decisions a little more often,
|
||||||
|
and not be afraid of trying wild new concepts.
|
Loading…
Reference in a new issue