Skip to content

Commit

Permalink
Make Error a struct instead of an enum to hide implementation (#40)
Browse files Browse the repository at this point in the history
* Make Error a struct instead of an enum

* include index in diplay impl

* make index its own variable, rename char variable

* Modify changelog additions

* Correct error text in zalgofy! docstring

* Add tests to index
  • Loading branch information
JSorngard authored Feb 1, 2024
1 parent 8875b54 commit df34250
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 146 deletions.
4 changes: 2 additions & 2 deletions common/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ This document contains all changes to the crate since version 0.9.4.

### Breaking changes

- The `Error::NotAscii` variant now stores the unencodable character instead of just the first byte of it.
- `Error` is now a struct.
- Removed the `byte` function from `Error`.

### Minor changes

- Add `is_not_ascii` and `is_unencodable_ascii` variant checking functions to `Error`.
- Add `char` function to `Error`.
- Add `index` functio to `Error`.

## 0.10.4

Expand Down
134 changes: 58 additions & 76 deletions common/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,41 @@
use core::fmt;

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
/// The error returned by [`zalgo_encode`](crate::zalgo_encode), [`ZalgoString::new`](crate::ZalgoString::new), and [`zalgo_wrap_python`](crate::zalgo_wrap_python)
/// if they encounter a byte they can not encode.
///
/// Only implements the [`Error`](std::error::Error) trait if the `std` feature is enabled.
pub enum Error {
/// Represents a valid ASCII character that is outside of the encodable set.
/// The first `u8` in the variant is the byte value of the character, the first `usize`
/// is the 1-indexed line number where the character occured, the second `usize` is
/// the 1-indexed column in which the character occured and the `&str` is a description
/// of the character.
UnencodableAscii(u8, usize, usize, &'static str),
/// Represents some other character.
/// The two `usize`s represent the same thing as in the `UnencodableAscii` variant,
NotAscii(char, usize, usize),
pub struct Error {
unencodable_character: char,
line: usize,
column: usize,
index: usize,
}

impl Error {
/// Creates a new `Error`.
///
/// # Note
///
/// This associated method does not check the validity of its inputs,
/// and just constructs a new `Error` instance.
#[inline]
#[must_use = "this associated method does not modify its inputs and just returns a new value"]
pub(crate) const fn new(
unencodable_character: char,
line: usize,
column: usize,
index: usize,
) -> Self {
Self {
unencodable_character,
line,
column,
index,
}
}

/// Returns the 1-indexed line number of the line on which the unencodable byte occured.
///
/// # Examples
Expand All @@ -32,9 +49,7 @@ impl Error {
#[inline]
#[must_use = "the method returns a new value and does not modify `self`"]
pub const fn line(&self) -> usize {
match self {
Self::UnencodableAscii(_, line, _, _) | Self::NotAscii(_, line, _) => *line,
}
self.line
}

/// Returns the 1-indexed column where the unencodable byte occured.
Expand All @@ -50,9 +65,7 @@ impl Error {
#[inline]
#[must_use = "the method returns a new value and does not modify `self`"]
pub const fn column(&self) -> usize {
match self {
Self::UnencodableAscii(_, _, column, _) | Self::NotAscii(_, _, column) => *column,
}
self.column
}

/// Returns the unencodable character that caused the error.
Expand Down Expand Up @@ -82,73 +95,34 @@ impl Error {
#[inline]
#[must_use = "the method returns a new value and does not modify `self`"]
pub const fn char(&self) -> char {
match self {
Self::UnencodableAscii(byte, _, _, _) => *byte as char,
Self::NotAscii(char, _, _) => *char,
}
self.unencodable_character
}

/// If the unencodable character is an ASCII character
/// this function returns a representation of it.
///
/// # Examples
///
/// ```
/// # use zalgo_codec_common::zalgo_encode;
/// assert_eq!(zalgo_encode("\r").map_err(|e| e.representation()), Err(Some("Carriage Return")));
/// assert_eq!(zalgo_encode("❤️").map_err(|e| e.representation()), Err(None));
/// ```
#[inline]
#[must_use = "the method returns a new value and does not modify `self`"]
pub const fn representation(&self) -> Option<&'static str> {
match self {
Self::UnencodableAscii(_, _, _, repr) => Some(*repr),
Self::NotAscii(_, _, _) => None,
}
}

/// Returns whether the error is the [`NotAscii`](Error::NotAscii) variant.
///
/// # Example
///
/// ```
/// # use zalgo_codec_common::{Error, zalgo_encode};
/// assert_eq!(zalgo_encode("Blå").map_err(|e| e.is_not_ascii()), Err(true));
/// ```
#[inline]
#[must_use = "the method returns a new value and does not modify `self`"]
pub const fn is_not_ascii(&self) -> bool {
matches!(self, Self::NotAscii(_, _, _))
}

/// Returns whether the error is the [`UnencodableAscii`](Error::UnencodableAscii) variant.
/// Returns the index of the string where the unencodable character occured.
///
/// # Example
///
/// ```
/// # use zalgo_codec_common::{Error, zalgo_encode};
/// assert_eq!(zalgo_encode("true\rfalse").map_err(|e| e.is_unencodable_ascii()), Err(true));
/// # use zalgo_codec_common::zalgo_encode;
/// assert_eq!(zalgo_encode("ab\ncdë").map_err(|e| e.index()), Err(5));
/// ```
#[inline]
#[must_use = "the method returns a new value and does not modify `self`"]
pub const fn is_unencodable_ascii(&self) -> bool {
matches!(self, Self::UnencodableAscii(_, _, _, _))
pub const fn index(&self) -> usize {
self.index
}
}

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::UnencodableAscii(byte, line, column, repr) => write!(
f,
"line {line} at column {column}: can not encode ascii '{repr}' character with byte value {byte}"
),
Self::NotAscii(char, line, column) => write!(
f,
"line {line} at column {column}: can not encode non-ascii character '{char}' (U+{:X})",
u32::from(*char)
),
}
write!(
f,
"can not encode {:?} character at string index {}, on line {} at column {}: ",
self.char(),
self.index(),
self.line(),
self.column(),
)
}
}

Expand All @@ -161,19 +135,27 @@ mod test {

#[test]
fn test_error() {
let err = Error::NotAscii('å', 1, 7);
assert!(err.is_not_ascii());
let err = Error {
line: 1,
column: 7,
unencodable_character: 'å',
index: 6,
};
assert_eq!(err.char(), 'å');
assert_eq!(err.line(), 1);
assert_eq!(err.column(), 7);
assert_eq!(err.representation(), None);
assert_eq!(err.index(), 6);

let err2 = Error::UnencodableAscii(13, 1, 2, "Carriage Return");
assert!(err2.is_unencodable_ascii());
let err2 = Error {
line: 1,
column: 2,
unencodable_character: '\r',
index: 1,
};
assert_eq!(err2.char(), '\r');
assert_eq!(err2.line(), 1);
assert_eq!(err2.column(), 2);
assert_eq!(err2.representation(), Some("Carriage Return"));
assert_eq!(err2.index(), 1);

assert_ne!(err, err2);
let err3 = err;
Expand Down
79 changes: 12 additions & 67 deletions common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,19 +257,15 @@ pub fn zalgo_encode(string: &str) -> Result<String, Error> {
encoded += 2;
column += 1;
} else {
match nonprintable_ascii_repr(*byte) {
Some(repr) => return Err(Error::UnencodableAscii(*byte, line, column, repr)),
None => {
// The panic should never trigger since we know that string[i*BATCH_SIZE + j]
// has some value which is stored in `byte`, and that this value is the first
// byte of a non-ascii character and that Strings in Rust are valid utf-8.
// All of this means that the value that starts at this index is a utf-8 encoded
// character, which `chars.next()` will extract.
let char = string[i*BATCH_SIZE + j..].chars().next()
.expect("i*BATCH_SIZE + j is within the string and on a char boundary, so string.chars().next() should find a char");
return Err(Error::NotAscii(char, line, column));
}
}
let index = i * BATCH_SIZE + j;
// The panic should never trigger since we know that string[i*BATCH_SIZE + j]
// has some value which is stored in `byte`, and that this value is the first
// byte of a non-ascii character and that Strings in Rust are valid utf-8.
// All of this means that the value that starts at this index is a utf-8 encoded
// character, which `chars.next()` will extract.
let unencodable_character = string[index..].chars().next()
.expect("i*BATCH_SIZE + j is within the string and on a char boundary, so string.chars().next() should find a char");
return Err(Error::new(unencodable_character, line, column, index));
}
}
result.extend_from_slice(&buffer[..encoded]);
Expand Down Expand Up @@ -370,9 +366,10 @@ const fn decode_byte_pair(odd: u8, even: u8) -> u8 {
/// ASCII character or newline.
/// ```
/// # use zalgo_codec_common::{Error, zalgo_wrap_python};
/// let res = zalgo_wrap_python(r#"print("That will be 5€ please")"#);
/// assert_eq!(
/// zalgo_wrap_python(r#"print("That will be 5€ please")"#),
/// Err(Error::NotAscii('€', 1, 22))
/// res.map_err(|e| (e.char(), e.line(), e.column())),
/// Err(('€', 1, 22)),
/// );
/// ```
#[must_use = "the function returns a new value and does not modify the input"]
Expand All @@ -381,58 +378,6 @@ pub fn zalgo_wrap_python(python: &str) -> Result<String, Error> {
Ok(format!("b='{encoded_string}'.encode();exec(''.join(chr(((h<<6&64|c&63)+22)%133+10)for h,c in zip(b[1::2],b[2::2])))"))
}

/// Returns the representation of the given ASCII byte if it's not printable.
#[inline]
#[must_use = "the function returns a new value and does not modify the input"]
const fn nonprintable_ascii_repr(byte: u8) -> Option<&'static str> {
if byte < 10 {
Some(
[
"Null",
"Start Of Heading",
"Start Of Text",
"End Of Text",
"End Of Transmission",
"Enquiry",
"Acknowledge",
"Bell",
"Backspace",
"Horizontal Tab",
][byte as usize],
)
} else if byte >= 11 && byte < 32 {
Some(
[
"Vertical Tab",
"Form Feed",
"Carriage Return",
"Shift Out",
"Shift In",
"Data Link Escape",
"Data Control 1",
"Data Control 2",
"Data Control 3",
"Data Control 4",
"Negative Acknowledge",
"Synchronous Idle",
"End Of Transmission Block",
"Cancel",
"End Of Medium",
"Substitute",
"Escape",
"File Separator",
"Group Separator",
"Record Separator",
"Unit Separator",
][byte as usize - 11],
)
} else if byte == 127 {
Some("Delete")
} else {
None
}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
2 changes: 1 addition & 1 deletion macro/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ pub fn zalgo_embed(encoded: TokenStream) -> TokenStream {
///
/// ```compile_fail
/// # use zalgo_codec_macro::zalgofy;
/// // compile error: "line 2 at column 3: byte value 226 does not correspond to an ASCII character"
/// // compile error: "can not encode '€' character at string index 4, on line 2 at column 3"
/// const ZS: &str = zalgofy!(
/// r"a
/// ae€"
Expand Down

0 comments on commit df34250

Please sign in to comment.