add "pipelines considered harmful" post
This commit is contained in:
parent
7e6a0c1de4
commit
997372fb77
4 changed files with 209 additions and 30 deletions
18
Gemfile
18
Gemfile
|
@ -1,10 +1,15 @@
|
|||
source "https://rubygems.org"
|
||||
|
||||
gem "jekyll", "~> 4.2.0"
|
||||
|
||||
gem "webrick", "~> 1.7"
|
||||
gem "jekyll", "~> 4.2"
|
||||
group :jekyll_plugins do
|
||||
gem "jekyll-feed", "~> 0.12"
|
||||
gem "jekyll-seo-tag", "~> 2.7"
|
||||
gem "jekyll-seo-tag", "~> 2.8"
|
||||
end
|
||||
|
||||
require "rbconfig"
|
||||
if RbConfig::CONFIG["target_os"] =~ /(?i-mx:bsd|dragonfly)/
|
||||
gem "rb-kqueue", ">= 0.2"
|
||||
end
|
||||
|
||||
platforms :mingw, :x64_mingw, :mswin, :jruby do
|
||||
|
@ -12,9 +17,6 @@ platforms :mingw, :x64_mingw, :mswin, :jruby do
|
|||
gem "tzinfo-data"
|
||||
end
|
||||
|
||||
gem "wdm", "~> 0.1.1", :platforms => [:mingw, :x64_mingw, :mswin]
|
||||
|
||||
require 'rbconfig'
|
||||
if RbConfig::CONFIG['target_os'] =~ /(?i-mx:bsd|dragonfly)/
|
||||
gem 'rb-kqueue', '>= 0.2'
|
||||
platforms :mingw, :x64_mingw, :mswin do
|
||||
gem "wdm", "~> 0.1.1"
|
||||
end
|
||||
|
|
39
Gemfile.lock
39
Gemfile.lock
|
@ -1,20 +1,20 @@
|
|||
GEM
|
||||
remote: https://rubygems.org/
|
||||
specs:
|
||||
addressable (2.8.0)
|
||||
public_suffix (>= 2.0.2, < 5.0)
|
||||
addressable (2.8.1)
|
||||
public_suffix (>= 2.0.2, < 6.0)
|
||||
colorator (1.1.0)
|
||||
concurrent-ruby (1.1.9)
|
||||
concurrent-ruby (1.1.10)
|
||||
em-websocket (0.5.3)
|
||||
eventmachine (>= 0.12.9)
|
||||
http_parser.rb (~> 0)
|
||||
eventmachine (1.2.7)
|
||||
ffi (1.15.4)
|
||||
ffi (1.15.5)
|
||||
forwardable-extended (2.6.0)
|
||||
http_parser.rb (0.8.0)
|
||||
i18n (1.8.11)
|
||||
i18n (1.12.0)
|
||||
concurrent-ruby (~> 1.0)
|
||||
jekyll (4.2.1)
|
||||
jekyll (4.2.2)
|
||||
addressable (~> 2.4)
|
||||
colorator (~> 1.0)
|
||||
em-websocket (~> 0.5)
|
||||
|
@ -29,52 +29,53 @@ GEM
|
|||
rouge (~> 3.0)
|
||||
safe_yaml (~> 1.0)
|
||||
terminal-table (~> 2.0)
|
||||
jekyll-feed (0.15.1)
|
||||
jekyll-feed (0.16.0)
|
||||
jekyll (>= 3.7, < 5.0)
|
||||
jekyll-sass-converter (2.1.0)
|
||||
jekyll-sass-converter (2.2.0)
|
||||
sassc (> 2.0.1, < 3.0)
|
||||
jekyll-seo-tag (2.7.1)
|
||||
jekyll-seo-tag (2.8.0)
|
||||
jekyll (>= 3.8, < 5.0)
|
||||
jekyll-watch (2.2.1)
|
||||
listen (~> 3.0)
|
||||
kramdown (2.3.1)
|
||||
kramdown (2.4.0)
|
||||
rexml
|
||||
kramdown-parser-gfm (1.1.0)
|
||||
kramdown (~> 2.0)
|
||||
liquid (4.0.3)
|
||||
listen (3.7.0)
|
||||
listen (3.7.1)
|
||||
rb-fsevent (~> 0.10, >= 0.10.3)
|
||||
rb-inotify (~> 0.9, >= 0.9.10)
|
||||
mercenary (0.4.0)
|
||||
pathutil (0.16.2)
|
||||
forwardable-extended (~> 2.6)
|
||||
public_suffix (4.0.6)
|
||||
rb-fsevent (0.11.0)
|
||||
public_suffix (5.0.0)
|
||||
rb-fsevent (0.11.1)
|
||||
rb-inotify (0.10.1)
|
||||
ffi (~> 1.0)
|
||||
rb-kqueue (0.2.7)
|
||||
rb-kqueue (0.2.8)
|
||||
ffi (>= 0.5.0)
|
||||
rexml (3.2.5)
|
||||
rouge (3.26.1)
|
||||
rouge (3.30.0)
|
||||
safe_yaml (1.0.5)
|
||||
sassc (2.4.0)
|
||||
ffi (~> 1.9)
|
||||
terminal-table (2.0.0)
|
||||
unicode-display_width (~> 1.1, >= 1.1.1)
|
||||
unicode-display_width (1.8.0)
|
||||
webrick (1.7.0)
|
||||
|
||||
PLATFORMS
|
||||
amd64-freebsd-12
|
||||
amd64-freebsd-13
|
||||
|
||||
DEPENDENCIES
|
||||
jekyll (~> 4.2.0)
|
||||
jekyll (~> 4.2)
|
||||
jekyll-feed (~> 0.12)
|
||||
jekyll-seo-tag (~> 2.7)
|
||||
jekyll-seo-tag (~> 2.8)
|
||||
rb-kqueue (>= 0.2)
|
||||
tzinfo (~> 1.2)
|
||||
tzinfo-data
|
||||
wdm (~> 0.1.1)
|
||||
webrick (~> 1.7)
|
||||
|
||||
BUNDLED WITH
|
||||
2.2.27
|
||||
2.3.21
|
||||
|
|
10
_config.yml
10
_config.yml
|
@ -2,16 +2,20 @@ title: fef's blog
|
|||
description: Shitposts about tech, gender, and the Big Sad.
|
||||
baseurl: "/"
|
||||
url: "https://fef.moe"
|
||||
hs_url: "http://pv3gsakv2vlbknfnet7nkn532zzejowdxwrxajnrtasadwnitsgpawyd.onion"
|
||||
plugins:
|
||||
- jekyll-seo-tag
|
||||
mirrors:
|
||||
tor: "http://pv3gsakv2vlbknfnet7nkn532zzejowdxwrxajnrtasadwnitsgpawyd.onion"
|
||||
dn42: "https://fef.dn42"
|
||||
author:
|
||||
name: anna
|
||||
email: owo -at- fef.moe
|
||||
email: owo -at- fef . moe
|
||||
pgp:
|
||||
fingerprint: "9D39A75A5CDA0A5DAE78F92EEC22E476DC2D3D84"
|
||||
url: "/pgp.txt"
|
||||
fedi:
|
||||
user: fef
|
||||
host: notbird.site
|
||||
host: catcatnya.com
|
||||
twitter: libfef
|
||||
matrix:
|
||||
user: fef
|
||||
|
|
172
_posts/2023-02-21-pipelines-considered-harmful.md
Normal file
172
_posts/2023-02-21-pipelines-considered-harmful.md
Normal file
|
@ -0,0 +1,172 @@
|
|||
---
|
||||
layout: post
|
||||
title: "Pipelines Considered Harmful"
|
||||
tags: tech
|
||||
---
|
||||
|
||||
Today, we're gonna have a closer look at pipes and what i consider "harmful" about them.
|
||||
Because i just *love* spicy takes that make a certain group of people with unreasonably strong
|
||||
opinions about computers really angry.
|
||||
|
||||
UNIX pipelines, or just pipes for short, have been around for over half a century.
|
||||
The fact alone that they still see widespread use today is a manifestation of the genius behind their idea.
|
||||
However, they have some limitations that we apparently just accepted to have to live with.
|
||||
|
||||
## What Is A Pipe?
|
||||
|
||||
Before we dive in, though, let's briefly go over what pipelines are and what makes them so useful,
|
||||
just to make sure we're all on the same page.
|
||||
I know that you know what a pipe is, but i promise the following paragraphs are relevant to my point.
|
||||
|
||||
Pipelines are an IPC mechanism that (at least in this form) first appeared in the UNIX operating system.
|
||||
They consist of a chain of multiple processes, where the standard output of the first one feeds
|
||||
into the standard input of the next one, and so on.
|
||||
|
||||
In conjunction with the basic UNIX shell tools like
|
||||
[`grep(1)`](https://man.freebsd.org/cgi/man.cgi?query=grep&manpath=FreeBSD+13.1-RELEASE)
|
||||
or
|
||||
[`sort(1)`](https://man.freebsd.org/cgi/man.cgi?query=sort&manpath=FreeBSD+13.1-RELEASE),
|
||||
and even powerful stream processors like
|
||||
[`awk(1)`](https://man.freebsd.org/cgi/man.cgi?query=awk&manpath=FreeBSD+13.1-RELEASE),
|
||||
they enable composing sophisticated scripts within just a single line.
|
||||
This embracing of modularity over monolithic designs is a core aspect of the
|
||||
[UNIX philosophy](https://en.wikipedia.org/wiki/Unix_philosophy).
|
||||
And it makes perfect sense from the perspective of a programmer like
|
||||
[Douglas McIlroy](https://en.wikipedia.org/wiki/Doug_McIlroy)
|
||||
(the dude who came up with pipes) because programming just so happens to be all about
|
||||
abstraction and keeping things modular.
|
||||
|
||||
## Plaintext Is Simple, Stupid
|
||||
|
||||
An interesting aspect of UNIX command-line tools is that they operate predominantly on plaintext.
|
||||
And this is no coincidence.
|
||||
[Peter H. Salus](https://en.wikipedia.org/wiki/Peter_H._Salus)
|
||||
summarized Ilroy's documentation of the UNIX philosophy as follows:
|
||||
|
||||
- Make programs that do one thing and do it well.
|
||||
- Write programs to work together.
|
||||
- Write programs to handle text streams, because that is a universal interface.
|
||||
|
||||
What i'm obviously referring to is the third point, the one about plaintext.
|
||||
And i'm not saying it's wrong, but i want you to take a moment to think about this.
|
||||
At the time, the most complicated data you probably ever had to deal with was the output of `ls -l`.
|
||||
What's more, the vastness of different kinds of data has exploded since then.
|
||||
|
||||
To give a practical example of where i'm headed, i have a small script that runs whenever i open up
|
||||
a new shell.
|
||||
It fetches my
|
||||
[canary message](https://fef.moe/canary.txt)
|
||||
using
|
||||
[`curl(1)`](https://man.freebsd.org/cgi/man.cgi?query=curl&manpath=FreeBSD+13.1-RELEASE+and+Ports),
|
||||
and extracts its `last-modified` HTTP header.
|
||||
It then compares this header with the current time and, if it is more than six days in the past,
|
||||
prints a message to the standard output reminding me to update the canary.
|
||||
|
||||
Sounds simple, right?
|
||||
I must admit there is probably a more elegant solution, but the entire point of pipelines is to
|
||||
*avoid* having to spend too much time with such seemingly simple tasks.
|
||||
Also, the version below is somewhat simplified to make it easier to read;
|
||||
my original version is a true one-liner.
|
||||
Anyway, here goes:
|
||||
|
||||
```sh
|
||||
SIX_DAYS=518400
|
||||
CANARY_DATE=`curl -I https://fef.moe/canary.txt |
|
||||
grep -i last-modified |
|
||||
cut -c 16-` # cut off the "last-modified: " part
|
||||
[ $(echo `date +%s` - `date -d "$CANARY_DATE" +%s` | bc) -ge $SIX_DAYS ] &&
|
||||
echo "update your canary ffs"
|
||||
```
|
||||
|
||||
This works.
|
||||
However, i believe it is unnecessarily complex.
|
||||
It is the year 2023, and for some reason we still use the same shells as in the 1970s.
|
||||
Sure, zsh makes your life way easier, but no matter what you use,
|
||||
you're still limited to piping text from one program to another.
|
||||
|
||||
Yes, HTTP response headers *can* be represented as plaintext; here is the output from the `curl`
|
||||
command in the example above:
|
||||
|
||||
```
|
||||
$ curl -I https://fef.moe/canary.txt
|
||||
HTTP/2 200
|
||||
server: nginx/1.22.1
|
||||
date: Tue, 21 Feb 2023 15:11:30 GMT
|
||||
content-type: text/plain; charset=utf-8
|
||||
content-length: 3347
|
||||
last-modified: Tue, 21 Feb 2023 01:35:24 GMT
|
||||
vary: Accept-Encoding
|
||||
|
||||
(remaining headers omitted for brevity)
|
||||
```
|
||||
|
||||
## The goto Of Data
|
||||
|
||||
But do we *have* to do it like that?
|
||||
|
||||
The thing that makes pipelines "harmful" to me is that plaintext is just too dynamic.
|
||||
It could be *anything*.
|
||||
Parsing is a wildly complex problem of computer science,
|
||||
so why would we do that if there was an easier way?
|
||||
|
||||
In his original article, Dijkstra considered the `goto` statement harmful because it lacks
|
||||
the clear structure of an `if` statement.
|
||||
This might be a little far-fetched, but you could view plaintext as the "goto of data".
|
||||
|
||||
## Getting Spicy
|
||||
|
||||
Computers have become powerful enough that performance pretty much isn't an issue for
|
||||
command-line tools anymore.
|
||||
Except for `dnf`; that one is just horrible.
|
||||
|
||||
Microsoft have been exploring other ways of piping data from one program to another:
|
||||
[objects](https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_objects?view=powershell-7.3).
|
||||
Now, i don't really like PowerShell because it has several other poor design decisions,
|
||||
and the fact that it is a Microsoft invention is generally a gigantic red flag.
|
||||
However, i do believe that we could learn something from it.
|
||||
Why can't my shell script look something like this:
|
||||
|
||||
```
|
||||
SIX_DAYS=518400
|
||||
CANARY_DATE=`curl -I https://fef.moe/canary.txt`.headers['last-modified']
|
||||
[ $(echo `date +%s` - `date -d "$CANARY_DATE" +%s` | bc) -ge $SIX_DAYS ] &&
|
||||
echo "update your canary ffs"
|
||||
```
|
||||
|
||||
I'm not saying this syntax is good, what i care about is the idea.
|
||||
That program output *can* have a machine-readable structure, kind of like a JSON object.
|
||||
If someone were to design a specification for UNIX objects
|
||||
**and included a way to transform these objects back into plaintext for backwards compatibility**,
|
||||
i believe we would have a much easier time dealing with computers.
|
||||
Basically, something like this:
|
||||
|
||||
```c
|
||||
struct attribute {
|
||||
char *name;
|
||||
char *value;
|
||||
};
|
||||
|
||||
struct object {
|
||||
struct attribute *attributes;
|
||||
char *(*to_string)(struct object *);
|
||||
};
|
||||
```
|
||||
|
||||
The `to_string` method would simply transform the object into the old-school textual representation.
|
||||
An actual implementation would of course have to deal with nested attributes and so on,
|
||||
but again, this is about the basic idea and not the details.
|
||||
|
||||
## Computer Science Is A Process
|
||||
|
||||
Don't get me wrong.
|
||||
As i clearly stated at the beginning of this article, i find the idea of pipelines as they exist
|
||||
today nothing short of ingenious.
|
||||
However, the field of programming is for some reason incredibly conservative.
|
||||
Why wouldn't we challenge ideas of the past, even if they work well for *most* use-cases?
|
||||
All that this "if it ain't broke, don't fix it" mentality has given us is an inability to spot
|
||||
shortcomings and limitations of existing technology.
|
||||
And that is, in my IMO, just sad.
|
||||
|
||||
I'm not saying we should throw away everything.
|
||||
Just that maybe we should reevaluate our design decisions a little more often,
|
||||
and not be afraid of trying wild new concepts.
|
Loading…
Reference in a new issue