kprintf: implement "full" format sequence support

So this was painful.  kprintf() supports most of
the format specifiers from BSD now, except for the
$ sequence.  It has gotten a general overhaul and
is significantly more sophisticated, bloated and
slower now.  There is also some minor stuff i
forgot about, like the 't' length modifier, but
that isn't so important right now and will be
fixed later(TM).
This commit is contained in:
anna 2021-10-03 04:31:28 +02:00
parent c31149c6cc
commit d475429639
Signed by: fef
GPG key ID: EC22E476DC2D3D84
6 changed files with 537 additions and 147 deletions

View file

@ -1,156 +1,95 @@
/* See the end of this file for copyright and license terms. */
#include <stdarg.h> /* from clang */
/*
* Ok, i'm gonna be honest, this is one of those functions where i wish i was
* writing them in Rust rather than C. Anyway, the tricky part of this entire
* implementation is that we can't call kmalloc() because that uses kprintf()
* internally for debug messages and the such, so it could potentially end in an
* infinite loop. But, since format strings allow arbitrary padding, we need to
* resort to our good old friend alloca().
* Also, this code is probably buggy as shit and i only tested some basic format
* sequences, so don't push it too far. Be gentle.
* I hope i never have to touch this again.
*/
#include <alloca.h> /* clang */
#include <stdarg.h> /* clang */
#include <string.h>
#include <gay/cdefs.h>
#include <gay/config.h>
#include <gay/errno.h>
#include <gay/kprintf.h>
#include <gay/types.h>
#if __SIZEOF_INT__ == 2
/* 5 decimal digits of 65535 (2 ** 16 - 1) */
# define PRINTF_UINT_BUFSZ 5
#elif __SIZEOF_INT__ == 4
/* 10 decimal digits of 4294967295 (2 ** 32 - 1) */
# define PRINTF_UINT_BUFSZ 10
#elif __SIZEOF_INT__ == 8
/* 20 decimal digits of 18446744073709551616 (2 ** 64 - 1) */
# define PRINTF_UINT_BUFSZ 20
#else
# error "Unsupported int size"
#endif
static struct kprintf_printer *printer = NULL;
static struct kprintf_renderer *renderer = NULL;
int kprintf_set_renderer(struct kprintf_renderer *new)
int kprintf_set_printer(struct kprintf_printer *new)
{
int ret = 0;
if (renderer != NULL)
ret = renderer->flush(renderer);
if (printer != NULL)
ret = printer->flush(printer);
renderer = new;
printer = new;
return ret;
}
static int fmt_handle_ptr(uintptr_t ptr)
enum length_modifier {
LENGTH_DEFAULT = 0,
LENGTH_H,
LENGTH_HH,
LENGTH_L,
LENGTH_LL,
LENGTH_J,
LENGTH_T,
LENGTH_Z,
};
struct fmt_sequence {
isize (*render)(const struct fmt_sequence *sequence, va_list *ap);
unsigned int min_width;
unsigned int max_precision;
enum length_modifier length_modifier;
struct {
bool hash;
bool zero;
bool minus;
bool space;
bool plus;
bool apos;
} flags __packed; /* save some bytes on the stack :) */
bool uppercase;
};
static void parse_fmt_sequence(struct fmt_sequence *sequence, const char **restrict pos);
/** @brief Write a NUL terminated string using the current `printer`. */
static isize write_asciz(const char *s);
/** @brief Write a specific amount of bytes using the current `printer`. */
static isize write_bytes(const void *buf, usize len);
int kvprintf(const char *fmt, va_list _args)
{
static const char table[] = {
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
};
int ret;
/* 2 chars per byte, plus 2 for the "0x" hex prefix */
char str[2 * sizeof(uintptr_t) + 2];
char *pos = &str[2 * sizeof(uintptr_t) + 1];
str[0] = '0';
str[1] = 'x';
do {
*pos-- = table[ptr & 0xf];
ptr >>= 4;
} while (pos != &str[1]);
ret = renderer->write(renderer, str, 2 * sizeof(uintptr_t) + 2);
return ret;
}
static int fmt_handle_uint(unsigned int u)
{
char str[PRINTF_UINT_BUFSZ];
char *pos = &str[PRINTF_UINT_BUFSZ - 1];
do {
/* stupid big endian humans, forcing us to do the whole thing in reverse */
*pos-- = (char)(u % 10) + '0'; /* convert to ASCII */
u /= 10;
} while (u != 0);
pos++;
return (int)renderer->write(renderer, pos, PRINTF_UINT_BUFSZ - (pos - str));
}
static inline int fmt_handle_int(int i)
{
int ret = 0;
char minus = '-';
if (i < 0) {
ret = renderer->write(renderer, &minus, sizeof(minus));
i = -i;
}
if (ret >= 0) {
int uint_ret = fmt_handle_uint((unsigned int)i);
if (uint_ret < 0)
ret = uint_ret;
else if (ret < 0)
ret += uint_ret;
}
return ret;
}
int kvprintf(const char *fmt, va_list args)
{
ssize_t ret = 0;
isize ret = 0;
const char *tmp = fmt;
union {
char c;
int d;
uintptr_t p;
char *s;
unsigned int u;
} val;
va_list args;
va_copy(args, _args);
while (*tmp != '\0') {
if (*tmp++ == '%') {
/* flush out everything we have so far (minus one char for %) */
ssize_t write_ret = renderer->write(
renderer,
fmt,
(size_t)tmp - (size_t)fmt - 1
);
/* write out everything we have so far (minus one char for %) */
isize write_ret = write_bytes(fmt, (usize)tmp - (usize)fmt - 1);
if (write_ret < 0) {
ret = write_ret;
break;
}
ret += write_ret;
ssize_t fmt_ret = 0;
switch (*tmp) {
case '%': /* literal percent sign */
fmt_ret = renderer->write(renderer, tmp, sizeof(*tmp));
break;
case 'c': /* char */
val.c = va_arg(args, int); /* POSIX wants an int */
fmt_ret = renderer->write(renderer, &val.c, sizeof(val.c));
break;
case 'd': /* int */
case 'i':
fmt_ret = fmt_handle_int(va_arg(args, int));
break;
case 'p': /* ptr */
fmt_ret = fmt_handle_ptr(va_arg(args, uintptr_t));
break;
case 's': /* string */
val.s = va_arg(args, char *);
fmt_ret = renderer->write(renderer, val.s, strlen(val.s));
break;
case 'u': /* unsigned int */
fmt_ret = fmt_handle_uint(va_arg(args, unsigned int));
break;
}
tmp++;
isize fmt_ret = 0;
struct fmt_sequence sequence;
parse_fmt_sequence(&sequence, &tmp);
if (sequence.render != NULL)
fmt_ret = sequence.render(&sequence, &args);
/*
* act as if the current position were the beginning in
@ -165,19 +104,20 @@ int kvprintf(const char *fmt, va_list args)
}
if (tmp != fmt && ret >= 0) {
ssize_t render_ret = renderer->write(renderer, fmt, (size_t)tmp - (size_t)fmt);
isize render_ret = write_bytes(fmt, (usize)tmp - (usize)fmt);
if (render_ret < 0)
ret = render_ret;
else
ret += render_ret;
}
ssize_t flush_ret = renderer->flush(renderer);
isize flush_ret = printer->flush(printer);
if (flush_ret < 0)
ret = flush_ret;
else
ret += flush_ret;
va_end(args);
return (int)ret;
}
@ -193,6 +133,447 @@ int kprintf(const char *fmt, ...)
return ret;
}
static isize render_c(const struct fmt_sequence *sequence, va_list *ap);
static isize render_d(const struct fmt_sequence *sequence, va_list *ap);
static isize render_o(const struct fmt_sequence *sequence, va_list *ap);
static isize render_p(const struct fmt_sequence *sequence, va_list *ap);
static isize render_s(const struct fmt_sequence *sequence, va_list *ap);
static isize render_u(const struct fmt_sequence *sequence, va_list *ap);
static isize render_x(const struct fmt_sequence *sequence, va_list *ap);
static isize render_percent(const struct fmt_sequence *sequence, va_list *ap);
/*
* Oh boi this is gonna be fun.
* So, this is basically a step by step implementation of the FreeBSD manpage
* for printf(), except that there is no support for all the deprecated
* specifiers and the (imho insane) $ directive:
* <https://www.freebsd.org/cgi/man.cgi?query=printf&sektion=3&manpath=FreeBSD+13.0-RELEASE>
*/
void parse_fmt_sequence(struct fmt_sequence *sequence, const char **restrict pos)
{
memset(sequence, 0, sizeof(*sequence));
if (**pos == '%') { /* %% */
sequence->render = render_percent;
return;
}
/*
* parse optional flags
*/
bool continue_parse_flags = true;
while (continue_parse_flags) {
switch (**pos) {
case '#':
sequence->flags.hash = true;
break;
case '0':
sequence->flags.zero = true;
break;
case '-':
sequence->flags.minus = true;
break;
case ' ':
/* the FreeBSD manpage says plus overrides space if both are used */
if (!sequence->flags.plus)
sequence->flags.space = true;
break;
case '+':
sequence->flags.plus = true;
sequence->flags.space = false;
break;
case '\'':
sequence->flags.apos = true;
break;
default:
continue_parse_flags = false;
break;
}
(*pos)++;
}
(*pos)--;
/*
* parse optional minimum digits
*/
while (**pos >= '0' && **pos <= '9') {
sequence->min_width *= 10;
sequence->min_width += **pos - '0';
if (sequence->max_precision > 128)
sequence->max_precision = 128;
(*pos)++;
}
/*
* parse optional maximum precision
*/
if (**pos == '.') {
(*pos)++;
while (**pos >= '0' && **pos <= '9') {
sequence->max_precision *= 10;
sequence->max_precision += **pos - '0';
/* sanitize length (prevents stack overflow) */
if (sequence->max_precision > 128)
sequence->max_precision = 128;
(*pos)++;
}
(*pos)--;
}
/*
* parse optional length modifier
*/
switch (**pos) {
case 'h':
case 'H':
if ((*pos)[1] == 'h' || (*pos)[1] == 'H') {
sequence->length_modifier = LENGTH_HH;
(*pos) += 2;
} else {
sequence->length_modifier = LENGTH_H;
(*pos) += 1;
}
break;
case 'l':
case 'L':
if ((*pos)[1] == 'l' || (*pos)[1] == 'L') {
sequence->length_modifier = LENGTH_LL;
(*pos) += 2;
} else {
sequence->length_modifier = LENGTH_L;
(*pos) += 1;
}
break;
case 'j':
case 'J':
sequence->length_modifier = LENGTH_J;
(*pos)++;
break;
case 't':
case 'T':
sequence->length_modifier = LENGTH_T;
(*pos)++;
break;
case 'z':
case 'Z':
sequence->length_modifier = LENGTH_Z;
(*pos)++;
break;
default:
break;
}
/*
* parse type specifier
*/
switch (**pos) {
case 'C':
sequence->length_modifier = LENGTH_L;
/* fall through */
case 'c':
sequence->render = render_c;
break;
case 'd':
case 'i':
sequence->render = render_d;
break;
case 'o':
sequence->render = render_o;
break;
case 'P':
sequence->uppercase = true;
/* fall through */
case 'p':
sequence->render = render_p;
break;
case 'S':
sequence->length_modifier = LENGTH_L;
/* fall through */
case 's':
sequence->render = render_s;
break;
case 'u':
sequence->render = render_u;
break;
case 'X':
sequence->uppercase = true;
/* fall through */
case 'x':
sequence->render = render_x;
break;
default:
sequence->render = NULL;
break;
}
(*pos)++;
}
static ssize_t render_c(const struct fmt_sequence *sequence, va_list *ap)
{
ssize_t ret;
if (sequence->length_modifier == LENGTH_L) {
wchar_t val = (wchar_t)va_arg(*ap, wint_t);
ret = printer->write(printer, &val, sizeof(val));
} else {
char val = (char)va_arg(*ap, int);
ret = printer->write(printer, &val, sizeof(val));
}
return ret;
}
static ssize_t render_s(const struct fmt_sequence *sequence, va_list *ap)
{
/* yes i know i forgot the wchar_t if the length_modifier is LENGTH_L but idgaf */
const char *s = va_arg(*ap, char *);
return write_asciz(s);
}
static inline void get_arg_signed(intmax_t *dest,
const struct fmt_sequence *sequence,
va_list *ap)
{
switch (sequence->length_modifier) {
case LENGTH_H:
case LENGTH_HH:
case LENGTH_DEFAULT:
/* short and char will be promoted to int with parameter passing */
*dest = va_arg(*ap, int);
break;
case LENGTH_L:
*dest = va_arg(*ap, long);
break;
case LENGTH_LL:
*dest = va_arg(*ap, long long);
break;
case LENGTH_Z:
*dest = va_arg(*ap, isize);
break;
case LENGTH_J:
*dest = va_arg(*ap, intmax_t);
break;
case LENGTH_T:
*dest = va_arg(*ap, intptr_t);
break;
}
}
static inline void get_arg_unsigned(uintmax_t *dest,
const struct fmt_sequence *sequence,
va_list *ap)
{
switch (sequence->length_modifier) {
case LENGTH_H:
case LENGTH_HH:
case LENGTH_DEFAULT:
/* short and char will be promoted to int with parameter passing */
*dest = va_arg(*ap, unsigned int);
break;
case LENGTH_L:
*dest = va_arg(*ap, unsigned long);
break;
case LENGTH_LL:
*dest = va_arg(*ap, unsigned long long);
break;
case LENGTH_Z:
*dest = va_arg(*ap, usize);
break;
case LENGTH_J:
*dest = va_arg(*ap, uintmax_t);
break;
case LENGTH_T:
*dest = va_arg(*ap, uintptr_t);
break;
}
}
static const char *digit_table_smol = "0123456789abcdef";
static const char *digit_table_big = "0123456789ABCDEF";
static inline void stringify_uint(char **buf, uintmax_t val, const char *digit_table,
unsigned int radix, int direction)
{
do {
**buf = digit_table[val % radix];
*buf += direction;
} while ((val /= radix) != 0);
*buf -= direction;
}
static isize render_d(const struct fmt_sequence *sequence, va_list *ap)
{
isize ret = 0;
intmax_t val;
get_arg_signed(&val, sequence, ap);
if (val < 0) {
val = -val;
ret = write_asciz("-");
if (ret < 0)
return ret;
} else if (sequence->flags.plus) {
ret = write_asciz("+");
if (ret < 0)
return ret;
} else if (sequence->flags.space) {
ret = write_asciz(" ");
if (ret < 0)
return ret;
}
usize len = 20; /* 2**64 has 20 decimal digits, let's hope intmax_t isn't 128 bits */
if (sequence->min_width > len)
len = sequence->min_width;
char *buf = alloca(len);
char *pos = &buf[len - 1];
stringify_uint(&pos, val, digit_table_smol, 10, -1);
while (sequence->min_width > len - (pos - buf))
*--pos = '0';
isize tmp = write_bytes(pos, len - (pos - buf));
if (tmp > 0)
ret += tmp;
else
ret = tmp;
return ret;
}
static isize render_o(const struct fmt_sequence *sequence, va_list *ap)
{
isize ret = 0;
if (sequence->flags.plus) {
ret = write_asciz("+");
if (ret < 0)
return ret;
} else if (sequence->flags.space) {
ret = write_asciz(" ");
if (ret < 0)
return ret;
}
uintmax_t val;
get_arg_unsigned(&val, sequence, ap);
usize len = 22; /* 2**64 has 22 octal digits, let's hope intmax_t isn't 128 bits */
if (sequence->min_width > len)
len = sequence->min_width;
char *buf = alloca(len);
char *pos = &buf[len - 1];
stringify_uint(&pos, val, digit_table_smol, 8, -1);
while (sequence->min_width > len - (pos - buf))
*--pos = '0';
isize tmp = write_bytes(pos, len - (pos - buf));
if (tmp > 0)
ret += tmp;
else
ret = tmp;
return ret;
}
static isize render_p(const struct fmt_sequence *sequence, va_list *ap)
{
/* 2 hex digits per byte + 2 for 0x prefix */
char buf[sizeof(uintptr_t) * 2 + 2];
char *pos = &buf[sizeof(uintptr_t) * 2 + 1];
const char *digit_table;
uintptr_t ptr = va_arg(*ap, uintptr_t);
buf[0] = '0';
buf[1] = 'x';
if (sequence->uppercase)
digit_table = digit_table_big;
else
digit_table = digit_table_smol;
while (pos > &buf[1]) {
*pos-- = digit_table[ptr % 0x10];
ptr >>= 4;
}
return write_bytes(buf, sizeof(buf));
}
static isize render_u(const struct fmt_sequence *sequence, va_list *ap)
{
isize ret = 0;
if (sequence->flags.plus) {
ret = write_asciz("+");
if (ret < 0)
return ret;
} else if (sequence->flags.space) {
ret = write_asciz(" ");
if (ret < 0)
return ret;
}
uintmax_t val;
get_arg_unsigned(&val, sequence, ap);
usize len = 20; /* 2^64 has 20 decimal digits, let's hope intmax_t isn't 128 bits */
if (sequence->min_width > len)
len = sequence->min_width;
char *buf = alloca(len);
char *pos = &buf[len - 1];
stringify_uint(&pos, val, digit_table_smol, 10, -1);
while (sequence->min_width > len - (pos - buf))
*--pos = '0';
isize tmp = write_bytes(pos, len - (pos - buf));
if (tmp > 0)
ret += tmp;
else
ret = tmp;
return ret;
}
static isize render_x(const struct fmt_sequence *sequence, va_list *ap)
{
char *buf;
usize len = sizeof(uintmax_t) * 2; /* 2 hex digits per byte */
if (len < sequence->min_width)
len = sequence->min_width;
buf = alloca(len);
char *pos = &buf[len];
uintmax_t val;
get_arg_unsigned(&val, sequence, ap);
const char *digit_table;
if (sequence->uppercase)
digit_table = digit_table_big;
else
digit_table = digit_table_smol;
stringify_uint(&pos, val, digit_table, 16, -1);
while (sequence->min_width > len - (pos - buf))
*--pos = '0';
return write_bytes(pos, len - (pos - buf));
}
static isize render_percent(const struct fmt_sequence *sequence, va_list *ap)
{
return write_asciz("%");
}
static inline isize write_asciz(const char *s)
{
return printer->write(printer, s, strlen(s));
}
static inline isize write_bytes(const void *buf, usize len)
{
return printer->write(printer, buf, len);
}
/*
* This file is part of GayBSD.
* Copyright (c) 2021 fef <owo@fef.moe>.