askama/
html.rs

1// The file is shared across many crates, not all have this feature.
2// If they don't then the tests won't be compiled in, but that's OK, because they are executed at
3// least in the crate `askama`. There's no need to run the test multiple times.
4#![allow(unexpected_cfgs)]
5
6use core::{fmt, str};
7
8use crate::ascii_str::{AsciiChar, AsciiStr};
9
10#[allow(unused)]
11pub(crate) fn write_escaped_str(mut dest: impl fmt::Write, src: &str) -> fmt::Result {
12    // This implementation reads one byte after another.
13    // It's not very fast, but should work well enough until portable SIMD gets stabilized.
14
15    let mut escaped_buf = ESCAPED_BUF_INIT;
16    let mut last = 0;
17
18    for (index, byte) in src.bytes().enumerate() {
19        if let Some(escaped) = get_escaped(byte) {
20            [escaped_buf[2], escaped_buf[3]] = escaped;
21            write_str_if_nonempty(&mut dest, &src[last..index])?;
22            dest.write_str(AsciiStr::from_slice(&escaped_buf[..ESCAPED_BUF_LEN]))?;
23            last = index + 1;
24        }
25    }
26    write_str_if_nonempty(&mut dest, &src[last..])
27}
28
29#[allow(unused)]
30pub(crate) fn write_escaped_char(mut dest: impl fmt::Write, c: char) -> fmt::Result {
31    if !c.is_ascii() {
32        dest.write_char(c)
33    } else if let Some(escaped) = get_escaped(c as u8) {
34        let mut escaped_buf = ESCAPED_BUF_INIT;
35        [escaped_buf[2], escaped_buf[3]] = escaped;
36        dest.write_str(AsciiStr::from_slice(&escaped_buf[..ESCAPED_BUF_LEN]))
37    } else {
38        // RATIONALE: `write_char(c)` gets optimized if it is known that `c.is_ascii()`
39        dest.write_char(c)
40    }
41}
42
43/// Returns the decimal representation of the codepoint if the character needs HTML escaping.
44#[inline]
45fn get_escaped(byte: u8) -> Option<[AsciiChar; 2]> {
46    if let MIN_CHAR..=MAX_CHAR = byte {
47        let entry = TABLE.0[(byte - MIN_CHAR) as usize];
48        (entry != UNESCAPED).then_some(entry)
49    } else {
50        None
51    }
52}
53
54#[inline(always)]
55fn write_str_if_nonempty(output: &mut impl fmt::Write, input: &str) -> fmt::Result {
56    if !input.is_empty() {
57        output.write_str(input)
58    } else {
59        Ok(())
60    }
61}
62
63/// List of characters that need HTML escaping, not necessarily in ordinal order.
64const CHARS: &[u8] = br#""&'<>"#;
65
66/// The character with the lowest codepoint that needs HTML escaping.
67const MIN_CHAR: u8 = {
68    let mut v = u8::MAX;
69    let mut i = 0;
70    while i < CHARS.len() {
71        if v > CHARS[i] {
72            v = CHARS[i];
73        }
74        i += 1;
75    }
76    v
77};
78
79/// The character with the highest codepoint that needs HTML escaping.
80const MAX_CHAR: u8 = {
81    let mut v = u8::MIN;
82    let mut i = 0;
83    while i < CHARS.len() {
84        if v < CHARS[i] {
85            v = CHARS[i];
86        }
87        i += 1;
88    }
89    v
90};
91
92/// Number of codepoints between the lowest and highest character that needs escaping, incl.
93const CHAR_RANGE: usize = (MAX_CHAR - MIN_CHAR + 1) as usize;
94
95#[repr(align(64))]
96struct Table([[AsciiChar; 2]; CHAR_RANGE]);
97
98/// For characters that need HTML escaping, the codepoint is formatted as decimal digits,
99/// otherwise `b"\0\0"`. Starting at [`MIN_CHAR`].
100const TABLE: &Table = &{
101    let mut table = Table([UNESCAPED; CHAR_RANGE]);
102    let mut i = 0;
103    while i < CHARS.len() {
104        let c = CHARS[i];
105        table.0[c as u32 as usize - MIN_CHAR as usize] = AsciiChar::two_digits(c as u32);
106        i += 1;
107    }
108    table
109};
110
111const UNESCAPED: [AsciiChar; 2] = AsciiStr::new_sized("");
112
113const ESCAPED_BUF_INIT_UNPADDED: &str = "&#__;";
114// RATIONALE: llvm generates better code if the buffer is register sized
115const ESCAPED_BUF_INIT: [AsciiChar; 8] = AsciiStr::new_sized(ESCAPED_BUF_INIT_UNPADDED);
116const ESCAPED_BUF_LEN: usize = ESCAPED_BUF_INIT_UNPADDED.len();
117
118#[test]
119#[cfg(feature = "alloc")]
120fn test_simple_html_string_escaping() {
121    extern crate alloc;
122
123    let mut buf = alloc::string::String::new();
124    write_escaped_str(&mut buf, "<script>").unwrap();
125    assert_eq!(buf, "&#60;script&#62;");
126
127    buf.clear();
128    write_escaped_str(&mut buf, "s<crip>t").unwrap();
129    assert_eq!(buf, "s&#60;crip&#62;t");
130
131    buf.clear();
132    write_escaped_str(&mut buf, "s<cripcripcripcripcripcripcripcripcripcrip>t").unwrap();
133    assert_eq!(buf, "s&#60;cripcripcripcripcripcripcripcripcripcrip&#62;t");
134}