use bytes::Bytes; use reqwest::header::HeaderValue; use std::ops::RangeBounds; use crate::core::*; use crate::util::slice::SliceCursor; use crate::util::transcode; /// Parse an HTTP Structured Field Value according to /// [RFC 8941](https://www.rfc-editor.org/info/rfc8941). /// /// Note: This parser is only compliant with RFC 8941 in strict mode; /// see [`ParseOptions::strict`] for details. /// /// Note: This only parses one "line" although the RFC says conforming /// software MUST support values split over several headers. /// If you wish to comply with the RFC, you MUST call this for every header /// line individually. pub trait ParseHeader<'a>: Sized { fn parse_from_ascii(header: &'a [u8], options: ParseOptions) -> Result; fn parse_from_header(header: &'a HeaderValue, options: ParseOptions) -> Result { Self::parse_from_ascii(header.as_bytes(), options) } } /// Options for the header parser. The default is strict mode, i.e. conforming to /// [RFC 8941](https://www.rfc-editor.org/info/rfc8941) except for multiline headers. pub struct ParseOptions { strict: bool, allow_utf8: bool, allow_url: bool, allow_param_bws: bool, max_dict_members: usize, max_list_members: usize, max_inner_list_members: usize, max_params: usize, } impl ParseOptions { /// Return the default options; see [`ParseOptions::default()`]. pub fn new() -> Self { Self::default() } /// Return options for parsing an HTTP header defined on top of RFC 8941. /// This is currently the default. pub fn rfc8941() -> Self { ParseOptions { strict: true, allow_utf8: false, allow_url: false, allow_param_bws: false, max_dict_members: 1024, max_list_members: 1024, max_inner_list_members: 256, max_params: 256, } } /// Return parser options suitable for parsing the HTTP `Link` header as /// defined in section 3 of [RFC 8288](https://www.rfc-editor.org/info/rfc8288). pub fn link_header() -> Self { Self::default() .strict(false) .allow_utf8(true) .allow_url(true) .allow_param_bws(true) } /// Enable strict mode, i.e. fully comply with RFC 8941 (except for the /// multiline header thing; consumers of this utility MUST call the parser /// on every header value with the same name manually). /// /// This option exists because the parser is also useful for headers that /// *almost* conform to the RFC with only some minor deviations (e.g. the /// `Link` header, which allows URLs enclosed in angle brackets). /// When parsing a header that is defined based on RFC 8941, this option /// MUST be set to `true`. /// /// This option takes precedence over all other ones and defaults to `true`. pub fn strict(mut self, strict: bool) -> Self { self.strict = strict; self } /// Accept the entire UTF-8 alphabet instead of just ASCII. /// Strict mode implies this is `false`. pub fn allow_utf8(mut self, allow_utf8: bool) -> Self { self.allow_utf8 = allow_utf8; self } /// Enable the non-standard URL Item type for values enclosed in angle /// brackets (`<>`). Strict mode implies this is `false`. pub fn allow_url(mut self, allow_url: bool) -> Self { self.allow_url = allow_url; self } /// Allow "bad" whitespace (as per the BWS rule in section 3.2.3 of /// [RFC 7230](https://www.rfc-editor.org/info/rfc7230)) before and after /// the `=` token in parameters, as well as before the semicolon. /// Strict mode implies this is `false`. pub fn allow_param_bws(mut self, allow_param_bws: bool) -> Self { self.allow_param_bws = allow_param_bws; self } /// Maximum number of members to allow in a Dictionary (minimum 1). /// Strict mode implies this is no less than 1024. pub fn max_dict_members(mut self, max_dict_members: usize) -> Self { self.max_dict_members = max_dict_members.max(1); self } /// Maximum number of members to allow in a List (minimum 1). /// Strict mode implies this is no less than 1024. pub fn max_list_members(mut self, max_list_members: usize) -> Self { self.max_list_members = max_list_members.max(1); self } /// Maximum number of members to allow in an Inner List (minimum 1). /// Strict mode implies this is no less than 256. pub fn max_inner_list_members(mut self, max_inner_list_members: usize) -> Self { self.max_inner_list_members = max_inner_list_members.max(1); self } /// Maximum number of parameters to allow on Items (minimum 1). /// Strict mode implies this is no less than 256. pub fn max_params(mut self, max_params: usize) -> Self { self.max_params = max_params.max(1); self } /// In strict mode, override all options to comply with RFC 8941. fn normalize(mut self) -> Self { if self.strict { if self.allow_utf8 { debug!("Strict mode enabled, overriding allow_utf8 to false"); self.allow_utf8 = false; } if self.allow_url { debug!("Strict mode enabled, overriding allow_url to false"); self.allow_url = false; } if self.allow_param_bws { debug!("Strict mode enabled, overriding allow_param_bws to false"); self.allow_param_bws = false; } if self.max_dict_members < 1024 { debug!("Strict mode enabled, overriding max_dict_members to 1024"); self.max_dict_members = 1024; } if self.max_list_members < 1024 { debug!("Strict mode enabled, overriding max_list_members to 1024"); self.max_list_members = 1024; } if self.max_inner_list_members < 256 { debug!("Strict mode enabled, overriding max_inner_list_members to 256"); self.max_inner_list_members = 256; } if self.max_params < 256 { debug!("Strict mode enabled, overriding max_params to 256"); self.max_params = 256; } } self } } impl Default for ParseOptions { fn default() -> Self { Self::rfc8941() } } /// A Dictionary (section 3.2). #[derive(Debug, PartialEq)] pub struct Dictionary<'a>(Vec<(&'a str, Member<'a>)>); /// A List (section 3.1). #[derive(Debug, PartialEq)] pub struct List<'a>(Vec>); /// A Member of a List or Dictionary. #[derive(Debug, PartialEq)] pub enum Member<'a> { Item(Item<'a>), InnerList(InnerList<'a>), } /// An Inner List (section 3.1.1). #[derive(Debug, PartialEq)] pub struct InnerList<'a> { items: Vec>, params: Vec<(&'a str, BareItem<'a>)>, } /// An Item (section 3.3). #[derive(Debug, PartialEq)] pub struct Item<'a> { bare_item: BareItem<'a>, params: Vec<(&'a str, BareItem<'a>)>, } /// An Item without Parameters. #[derive(Debug, PartialEq)] pub enum BareItem<'a> { Integer(i64), Decimal(f32), String(StringItem<'a>), Token(TokenItem<'a>), Url(UrlItem<'a>), ByteSequence(ByteSequenceItem<'a>), Boolean(bool), } #[derive(Debug, PartialEq)] pub struct StringItem<'a>(&'a str); #[derive(Debug, PartialEq)] pub struct TokenItem<'a>(&'a str); #[derive(Debug, PartialEq)] pub struct UrlItem<'a>(&'a str); #[derive(Debug, PartialEq)] pub struct ByteSequenceItem<'a>(&'a str); impl<'a> ParseHeader<'a> for Dictionary<'a> { fn parse_from_ascii(header: &'a [u8], options: ParseOptions) -> Result { Parser::new(header, options)?.parse_dictionary() } } impl<'a> Dictionary<'a> { pub fn get<'k, K>(&self, key: K) -> Option<&Member<'a>> where K: Into<&'k str>, { let key = key.into(); self.0.iter().find_map(|(k, v)| key.eq(*k).then_some(v)) } pub fn nth(&self, index: usize) -> Option<&(&'a str, Member<'a>)> { self.0.get(index) } } impl<'a> ParseHeader<'a> for List<'a> { fn parse_from_ascii(header: &'a [u8], options: ParseOptions) -> Result { Parser::new(header, options)?.parse_list() } } impl<'a> List<'a> { pub fn nth(&self, index: usize) -> Option<&Member<'a>> { self.0.get(index) } pub fn iter(&self) -> impl Iterator> { self.0.iter() } pub fn len(&self) -> usize { self.0.len() } } impl<'a> ParseHeader<'a> for Item<'a> { fn parse_from_ascii(header: &'a [u8], options: ParseOptions) -> Result { Parser::new(header, options)?.parse_item() } } impl<'a> Item<'a> { pub fn get_params(&self) -> &[(&'a str, BareItem<'a>)] { self.params.as_slice() } pub fn param<'k, K>(&self, key: K) -> Option<&BareItem<'a>> where K: Into<&'k str>, { let key = key.into(); self.params .iter() .find_map(|(k, v)| key.eq(*k).then_some(v)) } pub fn param_nocase<'k, K>(&self, key: K) -> Option<&BareItem<'a>> where K: Into<&'k str>, { let key = key.into(); self.params .iter() .find_map(|(k, v)| key.eq_ignore_ascii_case(k).then_some(v)) } pub fn has_param<'k, K>(&self, key: K) -> bool where K: Into<&'k str>, { let key = key.into(); self.params.iter().any(|(k, _)| key.eq(*k)) } pub fn as_integer(&self) -> Option { self.bare_item.as_integer() } pub fn as_decimal(&self) -> Option { self.bare_item.as_decimal() } pub fn as_string(&self) -> Option { self.bare_item.as_string() } pub fn as_token(&self) -> Option<&'a str> { self.bare_item.as_token() } pub fn as_string_or_token(&self) -> Option { self.bare_item.as_string_or_token() } pub fn as_url(&self) -> Option<&'a str> { self.bare_item.as_url() } pub fn as_byte_sequence(&self) -> Option { self.bare_item.as_byte_sequence() } pub fn as_boolean(&self) -> Option { self.bare_item.as_boolean() } } impl<'a> BareItem<'a> { pub fn as_integer(&self) -> Option { match self { BareItem::Integer(i) => Some(*i), _ => None, } } pub fn as_decimal(&self) -> Option { match self { BareItem::Decimal(d) => Some(*d), _ => None, } } pub fn as_string(&self) -> Option { match self { BareItem::String(s) => Some(remove_escapes_stupid(s.0)), _ => None, } } pub fn as_token(&self) -> Option<&'a str> { match self { BareItem::Token(t) => Some(t.0), _ => None, } } pub fn as_string_or_token(&self) -> Option { match self { BareItem::String(s) => Some(remove_escapes_stupid(s.0)), BareItem::Token(t) => Some(String::from(t)), _ => None, } } pub fn as_url(&self) -> Option<&'a str> { match self { BareItem::Url(u) => Some(u.0), _ => None, } } pub fn as_byte_sequence(&self) -> Option { match self { BareItem::ByteSequence(bs) => Some(bs.into()), _ => None, } } pub fn as_boolean(&self) -> Option { match self { BareItem::Boolean(b) => Some(*b), _ => None, } } } impl<'a> Member<'a> { pub fn get_params(&self) -> &[(&'a str, BareItem<'a>)] { match self { Member::Item(i) => i.get_params(), Member::InnerList(l) => l.get_params(), } } pub fn param(&self, key: K) -> Option<&BareItem<'a>> where K: Into<&'a str>, { match self { Member::Item(i) => i.param(key), Member::InnerList(l) => l.param(key), } } pub fn has_param(&self, key: K) -> bool where K: Into<&'a str>, { match self { Member::Item(i) => i.has_param(key), Member::InnerList(l) => l.has_param(key), } } pub fn as_integer(&self) -> Option { match self { Member::Item(i) => i.as_integer(), _ => None, } } pub fn as_decimal(&self) -> Option { match self { Member::Item(i) => i.as_decimal(), _ => None, } } pub fn as_string(&self) -> Option { match self { Member::Item(i) => i.as_string(), _ => None, } } pub fn as_token(&self) -> Option<&'a str> { match self { Member::Item(i) => i.as_token(), _ => None, } } pub fn as_byte_sequence(&self) -> Option { match self { Member::Item(i) => i.as_byte_sequence(), _ => None, } } pub fn as_boolean(&self) -> Option { match self { Member::Item(i) => i.as_boolean(), _ => None, } } pub fn as_list(&self) -> Option<&InnerList<'a>> { match self { Member::InnerList(l) => Some(l), _ => None, } } } impl<'a> InnerList<'a> { pub fn nth(&self, index: usize) -> Option<&Item<'a>> { self.items.get(index) } pub fn iter(&self) -> impl Iterator> { self.items.iter() } pub fn len(&self) -> usize { self.items.len() } pub fn get_params(&self) -> &[(&'a str, BareItem<'a>)] { self.params.as_slice() } pub fn param(&self, key: K) -> Option<&BareItem<'a>> where K: Into<&'a str>, { let key = key.into(); self.params .iter() .find_map(|(k, v)| key.eq(*k).then_some(v)) } pub fn has_param(&self, key: K) -> bool where K: Into<&'a str>, { let key = key.into(); self.params.iter().any(|(k, _)| key.eq(*k)) } } /// Internal implementation of Structured Field Values. /// Parsing methods have their respective production rules in the doc comment, /// which was extracted from the RFC. See section 1.2 for details. struct Parser<'a> { cursor: SliceCursor<'a, u8>, options: ParseOptions, } impl<'a> Parser<'a> { fn new(data: &'a [u8], options: ParseOptions) -> Result { let options = options.normalize(); if options.allow_utf8 { std::str::from_utf8(data).map_err(|e| Error::BadHeader(e.to_string()))?; } else if !data.is_ascii() { return Err(Error::BadHeader(String::from("Not an ASCII string"))); } Ok(Parser { cursor: SliceCursor::new(data), options, }) } /// Parse a full List (section 3.1). /// /// ```notrust /// sf-list = list-member *( OWS "," OWS list-member ) /// ``` fn parse_list(&mut self) -> Result> { let mut members = Vec::with_capacity(1); members.push(self.parse_list_member()?); self.skip_ows(); while self.skip_if(|c| c == b',') { self.skip_ows(); members.push(self.parse_list_member()?); if members.len() > self.options.max_list_members { return Err(self.make_error(format!( "List exceeds configured member limit of {}", self.options.max_list_members ))); } self.skip_ows(); } Ok(List(members)) } /// Parse a single list member. /// /// ```notrust /// list-member = sf-item / inner-list /// ``` fn parse_list_member(&mut self) -> Result> { if self.cursor.peek().copied() == Some(b'(') { self.parse_inner_list().map(Member::InnerList) } else { self.parse_item().map(Member::Item) } } /// Parse a Dictionary (section 3.2). /// /// ```notrust /// sf-dictionary = dict-member *( OWS "," OWS dict-member ) /// ``` fn parse_dictionary(&mut self) -> Result> { let mut members = Vec::with_capacity(1); members.push(self.parse_dict_member()?); self.skip_ows(); while self.skip_if(|c| c == b',') { self.skip_ows(); members.push(self.parse_dict_member()?); if members.len() > self.options.max_dict_members { return Err(self.make_error(format!( "Dictionary exceeds configured member limit of {}", self.options.max_dict_members ))); } self.skip_ows(); } Ok(Dictionary(members)) } /// Parse a Dictionary member. /// /// ```notrust /// dict-member = member-key ( parameters / ( "=" member-value )) /// member-key = key /// member-value = sf-item / inner-list /// ``` fn parse_dict_member(&mut self) -> Result<(&'a str, Member<'a>)> { // member-key let key = self.parse_key()?; if self.options.allow_param_bws { self.skip_bws_if_next_matches(|c| c == b'='); } let val = if self.skip_if(|c| c == b'=') { if self.options.allow_param_bws { self.skip_bws(); } // member-value if self.cursor.peek().copied() == Some(b'(') { Member::InnerList(self.parse_inner_list()?) } else { Member::Item(self.parse_item()?) } } else { // parameters Member::Item(Item { bare_item: BareItem::Boolean(true), params: self.parse_parameters()?, }) }; Ok((key, val)) } /// Parse an Inner List (section 3.1.1). /// /// ```notrust /// inner-list = "(" *SP [ sf-item *( 1*SP sf-item ) *SP ] ")" parameters /// ``` fn parse_inner_list(&mut self) -> Result> { self.assert_next(|c| c == b'(')?; self.skip_sp(); let mut items = Vec::new(); loop { if self.skip_if(|c| c == b')') { break; } items.push(self.parse_item()?); // > Parsers MUST support Inner Lists containing at least 256 members. if items.len() == 256 { break; } if self.skip_sp() != 1 { self.assert_next(|c| c == b')')?; break; } } let params = self.parse_parameters()?; Ok(InnerList { items, params }) } /// Parse a full Item including parameters (section 3.3). /// /// ```notrust /// sf-item = bare-item parameters /// ``` fn parse_item(&mut self) -> Result> { let bare_item = self.parse_bare_item()?; let params = self.parse_parameters()?; Ok(Item { bare_item, params }) } /// Parse a list of parameters (section 3.1.2). /// /// ```notrust /// parameters = *( ";" *SP parameter ) /// /// ; deviations in non-strict mode: /// parameters = *( ";" OWS parameter ) /// /// ; deviations if allow_param_bws: /// parameters = *( BWS ";" OWS parameter ) /// ``` fn parse_parameters(&mut self) -> Result)>> { let mut params = Vec::new(); if self.options.allow_param_bws { self.skip_bws_if_next_matches(|c| c == b';'); } while self.skip_if(|c| c == b';') { if self.options.strict { self.skip_sp(); } else { self.skip_ows(); } params.push(self.parse_parameter()?); if params.len() > self.options.max_params { return Err(self.make_error(format!( "Parameter count exceeds configured limit of {}", self.options.max_params ))); } if self.options.allow_param_bws { self.skip_bws_if_next_matches(|c| c == b';'); } } Ok(params) } /// Parse a single Parameter (section 3.1.2). /// /// ```notrust /// parameter = param-key [ "=" param-value ] /// param-key = key /// param-value = bare-item /// /// ; deviations if allow_param_bws: /// parameter = token [ BWS "=" BWS bare-item ] /// ``` fn parse_parameter(&mut self) -> Result<(&'a str, BareItem<'a>)> { let key = self.parse_key()?; if self.options.allow_param_bws { self.skip_bws_if_next_matches(|c| c == b'='); } let value = if self.skip_if(|c| c == b'=') { if self.options.allow_param_bws { self.skip_bws(); } self.parse_bare_item()? } else { BareItem::Boolean(true) }; Ok((key, value)) } /// Parse a key for a Parameter or Dictionary. /// /// ```notrust /// key = ( lcalpha / "*" ) /// *( lcalpha / DIGIT / "_" / "-" / "." / "*" ) /// lcalpha = %x61-7A ; a-z /// ``` fn parse_key(&mut self) -> Result<&'a str> { self.chop(); self.assert_next(is_key_start)?; self.skip_while(is_key_part); Ok(self.chop()) } /// Parse a bare item (section 3.3). /// /// ```notrust /// bare-item = sf-integer / sf-decimal / sf-string /// / sf-token / sf-binary / sf-boolean /// ``` fn parse_bare_item(&mut self) -> Result> { match self .cursor .peek() .copied() .ok_or_else(|| self.make_error("Unexpected end of header"))? { c if is_numeric_start(c) => self.parse_numeric(), b'"' => self.parse_string(), b'<' => self.parse_url(), c if is_token_start(c) => self.parse_token(), b':' => self.parse_byte_sequence(), b'?' => self.parse_boolean(), c => Err(self.make_error(format!("Unexpected character {:?}", c as char))), } } /// Parse an Integer (section 3.3.1) or Decimal (section 3.3.2) item. /// /// ```notrust /// sf-integer = ["-"] 1*15DIGIT /// sf-decimal = ["-"] 1*12DIGIT "." 1*3DIGIT /// ``` fn parse_numeric(&mut self) -> Result> { self.chop(); self.cursor.next_if(|&c| c == b'-'); let int_digits = self.parse_digits(1..=15)?.len(); if self.cursor.next_if(|&c| c == b'.').is_some() { if int_digits <= 12 { self.parse_digits(1..=3)?; Ok(BareItem::Decimal(self.chop().parse().unwrap())) } else { Err(self.make_error("Decimals may contain at most 12 integer digits")) } } else { Ok(BareItem::Integer(self.chop().parse().unwrap())) } } /// Parse a String item (section 3.3.3). /// /// ```notrust /// sf-string = DQUOTE *chr DQUOTE /// chr = unescaped / escaped /// unescaped = %x20-21 / %x23-5B / %x5D-7E /// escaped = "\" ( DQUOTE / "\" ) /// /// ; deviations if allow_utf8: /// unescaped = %x20-21 / %x23-5B / %x5D-7E / %x80-FF /// ``` fn parse_string(&mut self) -> Result> { self.assert_next(|c| c == b'"')?; let is_allowed_char = if self.options.allow_utf8 { is_string_part_utf8 } else { is_string_part }; self.chop(); loop { match self.require_next()? { b'"' => break, b'\\' => { self.assert_next(|c| c == b'\\' || c == b'"')?; } c if is_allowed_char(c) => continue, c => return Err(self.make_error(format!("Unexpected character {:?} in string", c))), } } let slice = self.chop(); let slice = &slice[..slice.len() - 1]; // discard the trailing " Ok(BareItem::String(StringItem(slice))) } /// Parse a non-standard URL item if `allow_url` is enabled in the options. fn parse_url(&mut self) -> Result> { if self.options.allow_url { self.assert_next(|c| c == b'<')?; self.chop(); if self.options.allow_utf8 { self.skip_while(is_url_part_utf8); } else { self.skip_while(is_url_part); } let slice = self.chop(); self.assert_next(|c| c == b'>')?; Ok(BareItem::Url(UrlItem(slice))) } else { Err(self.make_error( "allow_url is disabled, refusing to parse URL enclosed in ", )) } } /// Parse a Token item (section 3.3.4). /// /// ```notrust /// sf-token = ( ALPHA / "*" ) *( tchar / ":" / "/" ) /// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" /// / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" /// / DIGIT / ALPHA /// ; any VCHAR, except delimiters /// ``` fn parse_token(&mut self) -> Result> { self.chop(); self.assert_next(|c| c.is_ascii_alphabetic() || c == b'*')?; self.skip_while(|c| is_tchar(c) || c == b':' || c == b'/'); Ok(BareItem::Token(TokenItem(self.chop()))) } /// Parse a Byte Sequence item (section 3.3.5). /// /// ```notrust /// sf-binary = ":" *(base64) ":" /// base64 = ALPHA / DIGIT / "+" / "/" / "=" /// ``` fn parse_byte_sequence(&mut self) -> Result> { self.assert_next(|c| c == b':')?; self.chop(); self.skip_while(is_base64); let slice = self.chop(); self.assert_next(|c| c == b':')?; Ok(BareItem::ByteSequence(ByteSequenceItem(slice))) } /// Parse a Boolean item (section 3.3.6). /// /// ```notrust /// sf-boolean = "?" boolean /// boolean = "0" / "1" /// ``` fn parse_boolean(&mut self) -> Result> { self.assert_next(|c| c == b'?')?; self.chop(); let b = self.assert_next(|c| b"01".contains(&c))? == b'1'; self.chop(); Ok(BareItem::Boolean(b)) } fn parse_digits(&mut self, amount: impl RangeBounds) -> Result<&[u8]> { let slice = self.cursor.next_while(|c| c.is_ascii_digit()); if amount.contains(&slice.len()) { Ok(slice) } else { Err(self.make_error("Number out of permissible range")) } } fn skip_sp(&mut self) -> usize { self.cursor.next_while(|&c| c == b' ').len() } /// Skip optional whitespace as per section 3.2.3 of /// [RFC 7230](https://www.rfc-editor.org/info/rfc7230). /// /// ```notrust /// OWS = *( SP / HTAB ) /// ; optional whitespace /// ``` fn skip_ows(&mut self) -> usize { self.cursor.next_while(|&c| c == b' ' || c == b'\t').len() } /// Skip "bad" whitespace as per section 3.2.3 of /// [RFC 7230](https://www.rfc-editor.org/info/rfc7230). /// /// ```notrust /// BWS = OWS /// ; "bad" whitespace /// ``` fn skip_bws(&mut self) -> usize { self.skip_ows() } /// Skip "bad" whitespace (see [`Self::skip_bws`]) if the first character /// after the whitespace matches `predicate`. /// The cursor will point to the last whitespace character. fn skip_bws_if_next_matches(&mut self, predicate: F) -> Option where F: FnOnce(u8) -> bool, { self.cursor.attempt(|cursor| { let bws_count = cursor.next_while(|&c| c == b' ' || c == b'\t').len(); cursor.peek().filter(|&&c| predicate(c)).map(|_| bws_count) }) } fn assert_next(&mut self, predicate: F) -> Result where F: FnOnce(u8) -> bool, { let c = self.require_next()?; if predicate(c) { Ok(c) } else { Err(self.make_error(format!("Unexpected token {:?}", c as char))) } } fn skip_if(&mut self, predicate: F) -> bool where F: FnOnce(u8) -> bool, { self.cursor.next_if(|&c| predicate(c)).is_some() } fn skip_while(&mut self, mut predicate: F) -> usize where F: FnMut(u8) -> bool, { self.cursor.next_while(|&c| predicate(c)).len() } fn require_next(&mut self) -> Result { self.cursor .next() .copied() .ok_or_else(|| self.make_error("Unexpected end of header")) } fn chop(&mut self) -> &'a str { let bytes = self.cursor.chop(); // SAFETY: The parser validates the string when constructed unsafe { std::str::from_utf8_unchecked(bytes) } } fn make_error(&self, msg: S) -> Error where S: Into, { Error::BadHeader(msg.into()) } } const fn is_numeric_start(c: u8) -> bool { c.is_ascii_digit() || c == b'-' } const fn is_string_part(c: u8) -> bool { matches!(c, b'\x20'..=b'\x21' | b'\x23'..=b'\x5b' | b'\x5d'..=b'\x7e') } const fn is_string_part_utf8(c: u8) -> bool { !c.is_ascii() || is_string_part(c) } const fn is_url_part(c: u8) -> bool { c != b'>' && is_string_part(c) } const fn is_url_part_utf8(c: u8) -> bool { c != b'>' && is_string_part_utf8(c) } const fn is_token_start(c: u8) -> bool { c.is_ascii_alphabetic() || c == b'*' } const fn is_tchar(c: u8) -> bool { c.is_ascii_alphanumeric() || matches!( c, b'!' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'*' | b'+' | b'-' | b'.' | b'^' | b'_' | b'`' | b'|' | b'~' ) } const fn is_byte_sequence_start(c: u8) -> bool { c == b':' } const fn is_base64(c: u8) -> bool { c.is_ascii_alphanumeric() || matches!(c, b'+' | b'/' | b'=') } const fn is_key_start(c: u8) -> bool { c.is_ascii_lowercase() || c == b'*' } const fn is_key_part(c: u8) -> bool { c.is_ascii_lowercase() || c.is_ascii_digit() || matches!(c, b'_' | b'-' | b'.' | b'*') } fn remove_escapes_stupid(s: &str) -> String { let mut had_escape = false; let utf8 = s .as_bytes() .iter() .filter_map(|&c| { let should_take = c != b'\\' || had_escape; had_escape = !had_escape && (c == b'\\'); should_take.then_some(c) }) .collect(); // SAFETY: the data originally came from a &str slice unsafe { String::from_utf8_unchecked(utf8) } } impl<'a> From<&StringItem<'a>> for String { fn from(val: &StringItem<'a>) -> String { remove_escapes_stupid(val.0) } } impl<'a> From<&TokenItem<'a>> for String { fn from(val: &TokenItem<'a>) -> String { val.0.into() } } impl<'a> From<&ByteSequenceItem<'a>> for Bytes { fn from(val: &ByteSequenceItem<'a>) -> Bytes { transcode::base64_decode(val.0).unwrap() } } #[cfg(test)] mod tests { use super::*; use crate::util::transcode::base64_decode; fn mklist(header: &'static str) -> Result> { List::parse_from_ascii(header.as_bytes(), Default::default()) } fn mkdict(header: &'static str) -> Result> { Dictionary::parse_from_ascii(header.as_bytes(), Default::default()) } fn mkitem(header: &'static str) -> Result> { Item::parse_from_ascii(header.as_bytes(), Default::default()) } #[test] fn parse_list() { let list = mklist("sugar,tea ,\t rum").unwrap(); assert_eq!(list.nth(0).unwrap().as_token().unwrap(), "sugar"); assert_eq!(list.nth(1).unwrap().as_token().unwrap(), "tea"); assert_eq!(list.nth(2).unwrap().as_token().unwrap(), "rum"); assert!(list.nth(3).is_none()); assert!(mklist("sugar, ").is_err()); } #[test] fn parse_inner_list() { let list = mklist(r#"("foo" "bar"), ("baz"), ("bat" "one")"#).unwrap(); assert_eq!(list.len(), 3); let inner = list.nth(0).unwrap().as_list().unwrap(); assert_eq!(inner.len(), 2); assert_eq!(inner.nth(0).unwrap().as_string(), Some("foo".into())); assert_eq!(inner.nth(1).unwrap().as_string(), Some("bar".into())); assert!(inner.nth(2).is_none()); let inner = list.nth(1).unwrap().as_list().unwrap(); assert_eq!(inner.len(), 1); assert_eq!(inner.nth(0).unwrap().as_string(), Some("baz".into())); assert!(inner.nth(1).is_none()); let inner = list.nth(2).unwrap().as_list().unwrap(); assert_eq!(inner.len(), 2); assert_eq!(inner.nth(0).unwrap().as_string(), Some("bat".into())); assert_eq!(inner.nth(1).unwrap().as_string(), Some("one".into())); assert!(inner.nth(2).is_none()); assert!(list.nth(3).is_none()); let list = mklist(r#"( "a" )"#).unwrap(); assert_eq!(list.len(), 1); let inner = list.nth(0).unwrap().as_list().unwrap(); assert_eq!(inner.len(), 1); assert_eq!(inner.nth(0).unwrap().as_string(), Some("a".into())); assert!(inner.nth(1).is_none()); assert!(mklist(r#"("a" "b")"#).is_err()); assert!(mklist("(\t\"a\"\t)").is_err()); } #[test] fn parse_dictionary() { let dict = mkdict(r#"en="Applepie", da=:w4ZibGV0w6ZydGU=:"#).unwrap(); assert_eq!(dict.get("en"), dict.nth(0).map(|(_, v)| v)); assert_eq!(dict.get("en").unwrap().as_string(), Some("Applepie".into())); assert_eq!(dict.get("da"), dict.nth(1).map(|(_, v)| v)); assert_eq!( dict.get("da").unwrap().as_byte_sequence(), Some(base64_decode("w4ZibGV0w6ZydGU=").unwrap()) ); let dict = mkdict(r#"a=?0, b, c; foo=bar"#).unwrap(); assert_eq!(dict.get("a").unwrap().as_boolean(), Some(false)); assert_eq!(dict.get("b").unwrap().as_boolean(), Some(true)); let c = dict.get("c").unwrap(); assert_eq!(c.as_boolean(), Some(true)); assert_eq!(c.param("foo").unwrap().as_token(), Some("bar")); let dict = mkdict("rating=1.5, feelings=(joy sadness)").unwrap(); assert!(dict.get("rating").unwrap().as_decimal().unwrap() - 1.5 < 0.001); let feelings = dict.get("feelings").unwrap().as_list().unwrap(); assert_eq!(feelings.len(), 2); assert_eq!(feelings.nth(0).unwrap().as_token(), Some("joy")); assert_eq!(feelings.nth(1).unwrap().as_token(), Some("sadness")); assert!(feelings.nth(2).is_none()); } #[test] fn parse_item_integer() { let item = mkitem("-0").unwrap(); assert_eq!(item.as_integer(), Some(0)); let item = mkitem("999999999999999").unwrap(); assert_eq!(item.as_integer(), Some(999999999999999)); let item = mkitem("-999999999999999").unwrap(); assert_eq!(item.as_integer(), Some(-999999999999999)); assert!(mkitem("0999999999999999").is_err()); assert!(mkitem("-0999999999999999").is_err()); assert!(mkitem("1000000000000000").is_err()); assert!(mkitem("-1000000000000000").is_err()); } #[test] fn parse_item_decimal() { let item = mkitem("4.20").unwrap(); assert!((item.as_decimal().unwrap() - 4.20).abs() < 0.001); let item = mkitem("999999999999.999").unwrap(); assert!((item.as_decimal().unwrap() - 999999999999.999).abs() < 0.001); let item = mkitem("-999999999999.999").unwrap(); assert!((item.as_decimal().unwrap() + 999999999999.999).abs() < 0.001); assert!(mkitem("0999999999999.999").is_err()); assert!(mkitem("-0999999999999.999").is_err()); assert!(mkitem("999999999999.9990").is_err()); assert!(mkitem("-999999999999.9990").is_err()); } #[test] fn parse_item_string() { let item = mkitem("\"\"").unwrap(); assert_eq!(item.as_string(), Some("".into())); let item = mkitem(r#""\\\"""#).unwrap(); assert_eq!(item.as_string(), Some("\\\"".into())); assert!(mkitem("\"").is_err()); assert!(mkitem("\\").is_err()); } #[test] fn parse_item_token() { let item = mkitem(r#"token-val;param="param \"value\"""#).unwrap(); assert_eq!(item.as_token(), Some("token-val")); assert_eq!( item.param("param").and_then(|p| p.as_string()), Some("param \"value\"".into()) ); } #[test] fn parse_item_url() { let header = r#"; type="text/html""#; assert!(mkitem(header).is_err()); let item = Item::parse_from_ascii( header.as_bytes(), ParseOptions::default().strict(false).allow_url(true), ) .unwrap(); assert_eq!(item.as_url(), Some("https://example.com/a")); assert_eq!( item.param("type").unwrap().as_string(), Some("text/html".into()) ); } #[test] fn parse_item_byte_sequence() { let base64_str = "aWYgdXIgcmVhZGluZyB0aGlzIHlvdSBzaG91bGQgcHJvYmFibHkgZ28gb3V0c2lkZSBhbmQgdG91Y2ggc29tZSBmdWNraW5nIGdyYXNzCg=="; let item = mkitem(":aWYgdXIgcmVhZGluZyB0aGlzIHlvdSBzaG91bGQgcHJvYmFibHkgZ28gb3V0c2lkZSBhbmQgdG91Y2ggc29tZSBmdWNraW5nIGdyYXNzCg==:").unwrap(); assert_eq!( item.as_byte_sequence(), Some(base64_decode(base64_str).unwrap()) ) } #[test] fn parse_item_boolean() { let item = mkitem("?0").unwrap(); assert_eq!(item.as_boolean(), Some(false)); let item = mkitem("?1").unwrap(); assert_eq!(item.as_boolean(), Some(true)); } #[test] fn reject_non_ascii_item() { assert!(mkitem("þis is not ASCII").is_err()); } #[test] fn reject_empty_item() { assert!(mkitem("").is_err()); } }