|
/// Options to configure the behavior of [`lowercase`]. |
|
/// |
|
/// Which letters exactly are replaced, and by which other letters, depends on |
|
/// the given options. |
|
/// |
|
/// See individual variants for a description of the available behaviors. |
|
/// |
|
/// If you're not sure which mode to choose, [`LowercaseMode::Full`] is a a good |
|
/// default. |
|
/// |
|
/// [`lowercase`]: crate::lowercase() |
|
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] |
|
pub enum LowercaseMode { |
|
/// Full Unicode case mapping, suitable for most languages. |
|
/// |
|
/// See the [Turkic] and [Lithuanian] variants for exceptions. |
|
/// |
|
/// Context-dependent case mapping as described in Table 3-14 of the Unicode |
|
/// standard is currently not supported. |
|
/// |
|
/// [Turkic]: Self::Turkic |
|
/// [Lithuanian]: Self::Lithuanian |
|
Full, |
|
/// Only the ASCII region, i.e. the characters `'A'..='Z'` and `'a'..='z'`, |
|
/// are affected. |
|
/// |
|
/// This option cannot be combined with any other option. |
|
Ascii, |
|
/// Full Unicode case mapping, adapted for Turkic languages (Turkish, |
|
/// Azerbaijani, โฆ). |
|
/// |
|
/// This means that upper case I is mapped to lower case dotless i, and so |
|
/// on. |
|
Turkic, |
|
/// Currently, just [full Unicode case mapping]. |
|
/// |
|
/// In the future, full Unicode case mapping adapted for Lithuanian (keeping |
|
/// the dot on the lower case i even if there is an accent on top). |
|
/// |
|
/// [full Unicode case mapping]: Self::Full |
|
Lithuanian, |
|
/// Unicode case **folding**, which is more far-reaching than Unicode case |
|
/// mapping. |
|
/// |
|
/// This option currently cannot be combined with any other option (i.e. |
|
/// there is currently no variant for turkic languages). |
|
Fold, |
|
} |
|
|
|
impl Default for LowercaseMode { |
|
fn default() -> Self { |
|
Self::Full |
|
} |
|
} |
|
|
|
impl TryFrom<&str> for LowercaseMode { |
|
type Error = InvalidCaseMappingMode; |
|
|
|
#[inline] |
|
fn try_from(value: &str) -> Result<Self, Self::Error> { |
|
value.as_bytes().try_into() |
|
} |
|
} |
|
|
|
impl TryFrom<Option<&str>> for LowercaseMode { |
|
type Error = InvalidCaseMappingMode; |
|
|
|
#[inline] |
|
fn try_from(value: Option<&str>) -> Result<Self, Self::Error> { |
|
value.map(str::as_bytes).try_into() |
|
} |
|
} |
|
|
|
impl TryFrom<&[u8]> for LowercaseMode { |
|
type Error = InvalidCaseMappingMode; |
|
|
|
#[inline] |
|
fn try_from(value: &[u8]) -> Result<Self, Self::Error> { |
|
match value { |
|
b"ascii" => Ok(Self::Ascii), |
|
b"turkic" => Ok(Self::Turkic), |
|
b"lithuanian" => Ok(Self::Lithuanian), |
|
b"fold" => Ok(Self::Fold), |
|
_ => Err(InvalidCaseMappingMode::new()), |
|
} |
|
} |
|
} |
|
|
|
impl TryFrom<Option<&[u8]>> for LowercaseMode { |
|
type Error = InvalidCaseMappingMode; |
|
|
|
#[inline] |
|
fn try_from(value: Option<&[u8]>) -> Result<Self, Self::Error> { |
|
match value { |
|
None => Ok(Self::Full), |
|
Some(b"ascii") => Ok(Self::Ascii), |
|
Some(b"turkic") => Ok(Self::Turkic), |
|
Some(b"lithuanian") => Ok(Self::Lithuanian), |
|
Some(b"fold") => Ok(Self::Fold), |
|
Some(_) => Err(InvalidCaseMappingMode::new()), |
|
} |
|
} |
|
} |
|
|
|
impl FromStr for LowercaseMode { |
|
type Err = InvalidCaseMappingMode; |
|
|
|
#[inline] |
|
fn from_str(s: &str) -> Result<Self, Self::Err> { |
|
s.try_into() |
|
} |
|
} |
|
|
|
/// Returns an iterator that yields a copy of the bytes in the given slice with |
|
/// all uppercase letters replaced with their lowercase counterparts. |
|
/// |
|
/// This function treats the given slice as a [conventionally UTF-8 string]. |
|
/// UTF-8 byte sequences are converted to their Unicode lowercase equivalents. |
|
/// Invalid UTF-8 byte sequences are yielded as is. |
|
/// |
|
/// The case mapping mode is determined by the given [`LowercaseMode`]. See its |
|
/// documentation for details on the available case mapping modes. |
|
/// |
|
/// # Panics |
|
/// |
|
/// Not all [`LowercaseMode`]s are currently implemented. This function will |
|
/// panic if the caller supplies [Turkic] or [case folding] lowercasing mode. |
|
/// |
|
/// [conventionally UTF-8 string]: https://docs.rs/bstr/0.2.*/bstr/#when-should-i-use-byte-strings |
|
/// [Turkic]: LowercaseMode::Turkic |
|
/// [case folding]: LowercaseMode::Fold |
|
// TODO: make this const once we're no longer panicking. |
|
pub fn lowercase(slice: &[u8], options: LowercaseMode) -> Lowercase<'_> { |
|
match options { |
|
LowercaseMode::Full | LowercaseMode::Lithuanian => Lowercase::with_slice(slice), |
|
LowercaseMode::Ascii => Lowercase::with_ascii_slice(slice), |
|
// TODO: implement `turkic` and `fold` modes. |
|
LowercaseMode::Turkic => panic!("lowercase Turkic mode is not yet implemented"), |
|
LowercaseMode::Fold => panic!("lowercase case folding mode is not yet implemented"), |
|
} |
|
} |