forked from mirrors/catstodon
331 lines
11 KiB
JavaScript
331 lines
11 KiB
JavaScript
/*
|
||
|
||
`util/bio_metadata`
|
||
===================
|
||
|
||
> For more information on the contents of this file, please contact:
|
||
>
|
||
> - kibigo! [@kibi@glitch.social]
|
||
|
||
This file provides two functions for dealing with bio metadata. The
|
||
functions are:
|
||
|
||
- __`processBio(content)` :__
|
||
Processes `content` to extract any frontmatter. The returned
|
||
object has two properties: `text`, which contains the text of
|
||
`content` sans-frontmatter, and `metadata`, which is an array
|
||
of key-value pairs (in two-element array format). If no
|
||
frontmatter was provided in `content`, then `metadata` will be
|
||
an empty array.
|
||
|
||
- __`createBio(note, data)` :__
|
||
Reverses the process in `processBio()`; takes a `note` and an
|
||
array of two-element arrays (which should give keys and values)
|
||
and outputs a string containing a well-formed bio with
|
||
frontmatter.
|
||
|
||
*/
|
||
|
||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||
|
||
/*********************************************************************\
|
||
|
||
To my lovely code maintainers,
|
||
|
||
The syntax recognized by the Mastodon frontend for its bio metadata
|
||
feature is a subset of that provided by the YAML 1.2 specification.
|
||
In particular, Mastodon recognizes metadata which is provided as an
|
||
implicit YAML map, where each key-value pair takes up only a single
|
||
line (no multi-line values are permitted). To simplify the level of
|
||
processing required, Mastodon metadata frontmatter has been limited
|
||
to only allow those characters in the `c-printable` set, as defined
|
||
by the YAML 1.2 specification, instead of permitting those from the
|
||
`nb-json` characters inside double-quoted strings like YAML proper.
|
||
¶ It is important to note that Mastodon only borrows the *syntax*
|
||
of YAML, not its semantics. This is to say, Mastodon won't make any
|
||
attempt to interpret the data it receives. `true` will not become a
|
||
boolean; `56` will not be interpreted as a number. Rather, each key
|
||
and every value will be read as a string, and as a string they will
|
||
remain. The order of the pairs is unchanged, and any duplicate keys
|
||
are preserved. However, YAML escape sequences will be replaced with
|
||
the proper interpretations according to the YAML 1.2 specification.
|
||
¶ The implementation provided below interprets `<br>` as `\n` and
|
||
allows for an open <p> tag at the beginning of the bio. It replaces
|
||
the escaped character entities `'` and `"` with single or
|
||
double quotes, respectively, prior to processing. However, no other
|
||
escaped characters are replaced, not even those which might have an
|
||
impact on the syntax otherwise. These minor allowances are provided
|
||
because the Mastodon backend will insert these things automatically
|
||
into a bio before sending it through the API, so it is important we
|
||
account for them. Aside from this, the YAML frontmatter must be the
|
||
very first thing in the bio, leading with three consecutive hyphen-
|
||
minues (`---`), and ending with the same or, alternatively, instead
|
||
with three periods (`...`). No limits have been set with respect to
|
||
the number of characters permitted in the frontmatter, although one
|
||
should note that only limited space is provided for them in the UI.
|
||
¶ The regular expression used to check the existence of, and then
|
||
process, the YAML frontmatter has been split into a number of small
|
||
components in the code below, in the vain hope that it will be much
|
||
easier to read and to maintain. I leave it to the future readers of
|
||
this code to determine the extent of my successes in this endeavor.
|
||
|
||
UPDATE 19 Oct 2017: We no longer allow character escapes inside our
|
||
double-quoted strings for ease of processing. We now internally use
|
||
the name "ƔAML" in our code to clarify that this is Not Quite YAML.
|
||
|
||
Sending love + warmth eternal,
|
||
- kibigo [@kibi@glitch.social]
|
||
|
||
\*********************************************************************/
|
||
|
||
/* "u" FLAG COMPATABILITY */
|
||
|
||
let compat_mode = false;
|
||
try {
|
||
new RegExp('.', 'u');
|
||
} catch (e) {
|
||
compat_mode = true;
|
||
}
|
||
|
||
/* CONVENIENCE FUNCTIONS */
|
||
|
||
const unirex = str => compat_mode ? new RegExp(str) : new RegExp(str, 'u');
|
||
const rexstr = exp => '(?:' + exp.source + ')';
|
||
|
||
/* CHARACTER CLASSES */
|
||
|
||
const DOCUMENT_START = /^/;
|
||
const DOCUMENT_END = /$/;
|
||
const ALLOWED_CHAR = unirex( // `c-printable` in the YAML 1.2 spec.
|
||
compat_mode ? '[\t\n\r\x20-\x7e\x85\xa0-\ufffd]' : '[\t\n\r\x20-\x7e\x85\xa0-\ud7ff\ue000-\ufffd\u{10000}-\u{10FFFF}]'
|
||
);
|
||
const WHITE_SPACE = /[ \t]/;
|
||
const LINE_BREAK = /\r?\n|\r|<br\s*\/?>/;
|
||
const INDICATOR = /[-?:,[\]{}&#*!|>'"%@`]/;
|
||
const FLOW_CHAR = /[,[\]{}]/;
|
||
|
||
/* NEGATED CHARACTER CLASSES */
|
||
|
||
const NOT_WHITE_SPACE = unirex('(?!' + rexstr(WHITE_SPACE) + ')[^]');
|
||
const NOT_LINE_BREAK = unirex('(?!' + rexstr(LINE_BREAK) + ')[^]');
|
||
const NOT_INDICATOR = unirex('(?!' + rexstr(INDICATOR) + ')[^]');
|
||
const NOT_FLOW_CHAR = unirex('(?!' + rexstr(FLOW_CHAR) + ')[^]');
|
||
const NOT_ALLOWED_CHAR = unirex(
|
||
'(?!' + rexstr(ALLOWED_CHAR) + ')[^]'
|
||
);
|
||
|
||
/* BASIC CONSTRUCTS */
|
||
|
||
const ANY_WHITE_SPACE = unirex(rexstr(WHITE_SPACE) + '*');
|
||
const ANY_ALLOWED_CHARS = unirex(rexstr(ALLOWED_CHAR) + '*');
|
||
const NEW_LINE = unirex(
|
||
rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK)
|
||
);
|
||
const SOME_NEW_LINES = unirex(
|
||
'(?:' + rexstr(NEW_LINE) + ')+'
|
||
);
|
||
const POSSIBLE_STARTS = unirex(
|
||
rexstr(DOCUMENT_START) + rexstr(/<p[^<>]*>/) + '?'
|
||
);
|
||
const POSSIBLE_ENDS = unirex(
|
||
rexstr(SOME_NEW_LINES) + '|' +
|
||
rexstr(DOCUMENT_END) + '|' +
|
||
rexstr(/<\/p>/)
|
||
);
|
||
const QUOTE_CHAR = unirex(
|
||
'(?=' + rexstr(NOT_LINE_BREAK) + ')[^"]'
|
||
);
|
||
const ANY_QUOTE_CHAR = unirex(
|
||
rexstr(QUOTE_CHAR) + '*'
|
||
);
|
||
|
||
const ESCAPED_APOS = unirex(
|
||
'(?=' + rexstr(NOT_LINE_BREAK) + ')' + rexstr(/[^']|''/)
|
||
);
|
||
const ANY_ESCAPED_APOS = unirex(
|
||
rexstr(ESCAPED_APOS) + '*'
|
||
);
|
||
const FIRST_KEY_CHAR = unirex(
|
||
'(?=' + rexstr(NOT_LINE_BREAK) + ')' +
|
||
'(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
|
||
rexstr(NOT_INDICATOR) + '|' +
|
||
rexstr(/[?:-]/) +
|
||
'(?=' + rexstr(NOT_LINE_BREAK) + ')' +
|
||
'(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
|
||
'(?=' + rexstr(NOT_FLOW_CHAR) + ')'
|
||
);
|
||
const FIRST_VALUE_CHAR = unirex(
|
||
'(?=' + rexstr(NOT_LINE_BREAK) + ')' +
|
||
'(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
|
||
rexstr(NOT_INDICATOR) + '|' +
|
||
rexstr(/[?:-]/) +
|
||
'(?=' + rexstr(NOT_LINE_BREAK) + ')' +
|
||
'(?=' + rexstr(NOT_WHITE_SPACE) + ')'
|
||
// Flow indicators are allowed in values.
|
||
);
|
||
const LATER_KEY_CHAR = unirex(
|
||
rexstr(WHITE_SPACE) + '|' +
|
||
'(?=' + rexstr(NOT_LINE_BREAK) + ')' +
|
||
'(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
|
||
'(?=' + rexstr(NOT_FLOW_CHAR) + ')' +
|
||
rexstr(/[^:#]#?/) + '|' +
|
||
rexstr(/:/) + '(?=' + rexstr(NOT_WHITE_SPACE) + ')'
|
||
);
|
||
const LATER_VALUE_CHAR = unirex(
|
||
rexstr(WHITE_SPACE) + '|' +
|
||
'(?=' + rexstr(NOT_LINE_BREAK) + ')' +
|
||
'(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
|
||
// Flow indicators are allowed in values.
|
||
rexstr(/[^:#]#?/) + '|' +
|
||
rexstr(/:/) + '(?=' + rexstr(NOT_WHITE_SPACE) + ')'
|
||
);
|
||
|
||
/* YAML CONSTRUCTS */
|
||
|
||
const ƔAML_START = unirex(
|
||
rexstr(ANY_WHITE_SPACE) + '---'
|
||
);
|
||
const ƔAML_END = unirex(
|
||
rexstr(ANY_WHITE_SPACE) + '(?:---|\.\.\.)'
|
||
);
|
||
const ƔAML_LOOKAHEAD = unirex(
|
||
'(?=' +
|
||
rexstr(ƔAML_START) +
|
||
rexstr(ANY_ALLOWED_CHARS) + rexstr(NEW_LINE) +
|
||
rexstr(ƔAML_END) + rexstr(POSSIBLE_ENDS) +
|
||
')'
|
||
);
|
||
const ƔAML_DOUBLE_QUOTE = unirex(
|
||
'"' + rexstr(ANY_QUOTE_CHAR) + '"'
|
||
);
|
||
const ƔAML_SINGLE_QUOTE = unirex(
|
||
'\'' + rexstr(ANY_ESCAPED_APOS) + '\''
|
||
);
|
||
const ƔAML_SIMPLE_KEY = unirex(
|
||
rexstr(FIRST_KEY_CHAR) + rexstr(LATER_KEY_CHAR) + '*'
|
||
);
|
||
const ƔAML_SIMPLE_VALUE = unirex(
|
||
rexstr(FIRST_VALUE_CHAR) + rexstr(LATER_VALUE_CHAR) + '*'
|
||
);
|
||
const ƔAML_KEY = unirex(
|
||
rexstr(ƔAML_DOUBLE_QUOTE) + '|' +
|
||
rexstr(ƔAML_SINGLE_QUOTE) + '|' +
|
||
rexstr(ƔAML_SIMPLE_KEY)
|
||
);
|
||
const ƔAML_VALUE = unirex(
|
||
rexstr(ƔAML_DOUBLE_QUOTE) + '|' +
|
||
rexstr(ƔAML_SINGLE_QUOTE) + '|' +
|
||
rexstr(ƔAML_SIMPLE_VALUE)
|
||
);
|
||
const ƔAML_SEPARATOR = unirex(
|
||
rexstr(ANY_WHITE_SPACE) +
|
||
':' + rexstr(WHITE_SPACE) +
|
||
rexstr(ANY_WHITE_SPACE)
|
||
);
|
||
const ƔAML_LINE = unirex(
|
||
'(' + rexstr(ƔAML_KEY) + ')' +
|
||
rexstr(ƔAML_SEPARATOR) +
|
||
'(' + rexstr(ƔAML_VALUE) + ')'
|
||
);
|
||
|
||
/* FRONTMATTER REGEX */
|
||
|
||
const ƔAML_FRONTMATTER = unirex(
|
||
rexstr(POSSIBLE_STARTS) +
|
||
rexstr(ƔAML_LOOKAHEAD) +
|
||
rexstr(ƔAML_START) + rexstr(SOME_NEW_LINES) +
|
||
'(?:' +
|
||
rexstr(ANY_WHITE_SPACE) + rexstr(ƔAML_LINE) + rexstr(SOME_NEW_LINES) +
|
||
'){0,5}' +
|
||
rexstr(ƔAML_END) + rexstr(POSSIBLE_ENDS)
|
||
);
|
||
|
||
/* SEARCHES */
|
||
|
||
const FIND_ƔAML_LINE = unirex(
|
||
rexstr(NEW_LINE) + rexstr(ANY_WHITE_SPACE) + rexstr(ƔAML_LINE)
|
||
);
|
||
|
||
/* STRING PROCESSING */
|
||
|
||
function processString (str) {
|
||
switch (str.charAt(0)) {
|
||
case '"':
|
||
return str.substring(1, str.length - 1);
|
||
case '\'':
|
||
return str
|
||
.substring(1, str.length - 1)
|
||
.replace(/''/g, '\'');
|
||
default:
|
||
return str;
|
||
}
|
||
}
|
||
|
||
/* BIO PROCESSING */
|
||
|
||
export function processBio(content) {
|
||
content = content.replace(/"/g, '"').replace(/'/g, '\'');
|
||
let result = {
|
||
text: content,
|
||
metadata: [],
|
||
};
|
||
let ɣaml = content.match(ƔAML_FRONTMATTER);
|
||
if (!ɣaml) {
|
||
return result;
|
||
} else {
|
||
ɣaml = ɣaml[0];
|
||
}
|
||
const start = content.search(ƔAML_START);
|
||
const end = start + ɣaml.length - ɣaml.search(ƔAML_START);
|
||
result.text = content.substr(end);
|
||
let metadata = null;
|
||
let query = new RegExp(rexstr(FIND_ƔAML_LINE), 'g'); // Some browsers don't allow flags unless both args are strings
|
||
while ((metadata = query.exec(ɣaml))) {
|
||
result.metadata.push([
|
||
processString(metadata[1]),
|
||
processString(metadata[2]),
|
||
]);
|
||
}
|
||
return result;
|
||
}
|
||
|
||
/* BIO CREATION */
|
||
|
||
export function createBio(note, data) {
|
||
if (!note) note = '';
|
||
let frontmatter = '';
|
||
if ((data && data.length) || note.match(/^\s*---\s+/)) {
|
||
if (!data) frontmatter = '---\n...\n';
|
||
else {
|
||
frontmatter += '---\n';
|
||
for (let i = 0; i < data.length; i++) {
|
||
let key = '' + data[i][0];
|
||
let val = '' + data[i][1];
|
||
|
||
// Key processing
|
||
if (key === (key.match(ƔAML_SIMPLE_KEY) || [])[0]) /* do nothing */;
|
||
else if (key === (key.match(ANY_QUOTE_CHAR) || [])[0]) key = '"' + key + '"';
|
||
else {
|
||
key = key
|
||
.replace(/'/g, '\'\'')
|
||
.replace(new RegExp(rexstr(NOT_ALLOWED_CHAR), compat_mode ? 'g' : 'gu'), '<27>');
|
||
key = '\'' + key + '\'';
|
||
}
|
||
|
||
// Value processing
|
||
if (val === (val.match(ƔAML_SIMPLE_VALUE) || [])[0]) /* do nothing */;
|
||
else if (val === (val.match(ANY_QUOTE_CHAR) || [])[0]) val = '"' + val + '"';
|
||
else {
|
||
key = key
|
||
.replace(/'/g, '\'\'')
|
||
.replace(new RegExp(rexstr(NOT_ALLOWED_CHAR), compat_mode ? 'g' : 'gu'), '<27>');
|
||
key = '\'' + key + '\'';
|
||
}
|
||
|
||
frontmatter += key + ': ' + val + '\n';
|
||
}
|
||
frontmatter += '...\n';
|
||
}
|
||
}
|
||
return frontmatter + note;
|
||
}
|