askama/filters/
json.rs

1use std::convert::Infallible;
2use std::{fmt, io, str};
3
4use serde::Serialize;
5use serde_json::ser::{CompactFormatter, PrettyFormatter, Serializer};
6
7use super::AsIndent;
8use crate::ascii_str::{AsciiChar, AsciiStr};
9use crate::{FastWritable, NO_VALUES, Values};
10
11/// Serialize to JSON (requires `json` feature)
12///
13/// The generated string does not contain ampersands `&`, chevrons `< >`, or apostrophes `'`.
14/// To use it in a `<script>` you can combine it with the safe filter:
15///
16/// ``` html
17/// <script>
18/// var data = {{data|json|safe}};
19/// </script>
20/// ```
21///
22/// To use it in HTML attributes, you can either use it in quotation marks `"{{data|json}}"` as is,
23/// or in apostrophes with the (optional) safe filter `'{{data|json|safe}}'`.
24/// In HTML texts the output of e.g. `<pre>{{data|json|safe}}</pre>` is safe, too.
25///
26/// ```
27/// # #[cfg(feature = "code-in-doc")] {
28/// # use askama::Template;
29/// /// ```jinja
30/// /// <div><li data-extra='{{data|json|safe}}'>Example</li></div>
31/// /// ```
32///
33/// #[derive(Template)]
34/// #[template(ext = "html", in_doc = true)]
35/// struct Example<'a> {
36///     data: Vec<&'a str>,
37/// }
38///
39/// assert_eq!(
40///     Example { data: vec!["foo", "bar"] }.to_string(),
41///     "<div><li data-extra='[\"foo\",\"bar\"]'>Example</li></div>"
42/// );
43/// # }
44/// ```
45#[inline]
46pub fn json(value: impl Serialize) -> Result<impl fmt::Display, Infallible> {
47    Ok(ToJson { value })
48}
49
50/// Serialize to formatted/prettified JSON (requires `json` feature)
51///
52/// This filter works the same as [`json()`], but it formats the data for human readability.
53/// It has an additional "indent" argument, which can either be an integer how many spaces to use
54/// for indentation (capped to 16 characters), or a string (e.g. `"\u{A0}\u{A0}"` for two
55/// non-breaking spaces).
56///
57/// ### Note
58///
59/// In askama's template language, this filter is called `|json`, too. The right function is
60/// automatically selected depending on whether an `indent` argument was provided or not.
61///
62/// ```
63/// # #[cfg(feature = "code-in-doc")] {
64/// # use askama::Template;
65/// /// ```jinja
66/// /// <div>{{data|json(4)|safe}}</div>
67/// /// ```
68///
69/// #[derive(Template)]
70/// #[template(ext = "html", in_doc = true)]
71/// struct Example<'a> {
72///     data: Vec<&'a str>,
73/// }
74///
75/// assert_eq!(
76///     Example { data: vec!["foo", "bar"] }.to_string(),
77///     "<div>[
78///     \"foo\",
79///     \"bar\"
80/// ]</div>"
81/// );
82/// # }
83/// ```
84#[inline]
85pub fn json_pretty(
86    value: impl Serialize,
87    indent: impl AsIndent,
88) -> Result<impl fmt::Display, Infallible> {
89    Ok(ToJsonPretty { value, indent })
90}
91
92#[derive(Debug, Clone)]
93struct ToJson<S> {
94    value: S,
95}
96
97#[derive(Debug, Clone)]
98struct ToJsonPretty<S, I> {
99    value: S,
100    indent: I,
101}
102
103impl<S: Serialize> FastWritable for ToJson<S> {
104    #[inline]
105    fn write_into(&self, f: &mut dyn fmt::Write, _: &dyn Values) -> crate::Result<()> {
106        serialize(f, &self.value, CompactFormatter)
107    }
108}
109
110impl<S: Serialize> fmt::Display for ToJson<S> {
111    #[inline]
112    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
113        Ok(self.write_into(f, NO_VALUES)?)
114    }
115}
116
117impl<S: Serialize, I: AsIndent> FastWritable for ToJsonPretty<S, I> {
118    #[inline]
119    fn write_into(&self, f: &mut dyn fmt::Write, _: &dyn Values) -> crate::Result<()> {
120        serialize(
121            f,
122            &self.value,
123            PrettyFormatter::with_indent(self.indent.as_indent().as_bytes()),
124        )
125    }
126}
127
128impl<S: Serialize, I: AsIndent> fmt::Display for ToJsonPretty<S, I> {
129    #[inline]
130    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
131        Ok(self.write_into(f, NO_VALUES)?)
132    }
133}
134
135fn serialize<S, W, F>(dest: &mut W, value: &S, formatter: F) -> Result<(), crate::Error>
136where
137    S: Serialize + ?Sized,
138    W: fmt::Write + ?Sized,
139    F: serde_json::ser::Formatter,
140{
141    /// The struct must only ever be used with the output of `serde_json`.
142    /// `serde_json` only produces UTF-8 strings in its `io::Write::write()` calls,
143    /// and `<JsonWriter as io::Write>` depends on this invariant.
144    struct JsonWriter<'a, W: fmt::Write + ?Sized>(&'a mut W);
145
146    impl<W: fmt::Write + ?Sized> io::Write for JsonWriter<'_, W> {
147        /// Invariant: must be passed valid UTF-8 slices
148        #[inline]
149        fn write(&mut self, bytes: &[u8]) -> io::Result<usize> {
150            self.write_all(bytes)?;
151            Ok(bytes.len())
152        }
153
154        /// Invariant: must be passed valid UTF-8 slices
155        fn write_all(&mut self, bytes: &[u8]) -> io::Result<()> {
156            // SAFETY: `serde_json` only writes valid strings
157            let string = unsafe { std::str::from_utf8_unchecked(bytes) };
158            write_escaped_str(&mut *self.0, string)
159                .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))
160        }
161
162        #[inline]
163        fn flush(&mut self) -> io::Result<()> {
164            Ok(())
165        }
166    }
167
168    /// Invariant: no character that needs escaping is multi-byte character when encoded in UTF-8;
169    /// that is true for characters in ASCII range.
170    #[inline]
171    fn write_escaped_str(dest: &mut (impl fmt::Write + ?Sized), src: &str) -> fmt::Result {
172        // This implementation reads one byte after another.
173        // It's not very fast, but should work well enough until portable SIMD gets stabilized.
174
175        let mut escaped_buf = ESCAPED_BUF_INIT;
176        let mut last = 0;
177
178        for (index, byte) in src.bytes().enumerate() {
179            if let Some(escaped) = get_escaped(byte) {
180                [escaped_buf[4], escaped_buf[5]] = escaped;
181                write_str_if_nonempty(dest, &src[last..index])?;
182                dest.write_str(AsciiStr::from_slice(&escaped_buf[..ESCAPED_BUF_LEN]))?;
183                last = index + 1;
184            }
185        }
186        write_str_if_nonempty(dest, &src[last..])
187    }
188
189    let mut serializer = Serializer::with_formatter(JsonWriter(dest), formatter);
190    Ok(value.serialize(&mut serializer)?)
191}
192
193/// Returns the decimal representation of the codepoint if the character needs HTML escaping.
194#[inline]
195fn get_escaped(byte: u8) -> Option<[AsciiChar; 2]> {
196    const _: () = assert!(CHAR_RANGE < 32);
197
198    if let MIN_CHAR..=MAX_CHAR = byte
199        && (1u32 << (byte - MIN_CHAR)) & BITS != 0
200    {
201        return Some(TABLE.0[(byte - MIN_CHAR) as usize]);
202    }
203    None
204}
205
206#[inline(always)]
207fn write_str_if_nonempty(output: &mut (impl fmt::Write + ?Sized), input: &str) -> fmt::Result {
208    if !input.is_empty() {
209        output.write_str(input)
210    } else {
211        Ok(())
212    }
213}
214
215/// List of characters that need HTML escaping, not necessarily in ordinal order.
216const CHARS: &[u8] = br#"&'<>"#;
217
218/// The character with the lowest codepoint that needs HTML escaping.
219const MIN_CHAR: u8 = {
220    let mut v = u8::MAX;
221    let mut i = 0;
222    while i < CHARS.len() {
223        if v > CHARS[i] {
224            v = CHARS[i];
225        }
226        i += 1;
227    }
228    v
229};
230
231/// The character with the highest codepoint that needs HTML escaping.
232const MAX_CHAR: u8 = {
233    let mut v = u8::MIN;
234    let mut i = 0;
235    while i < CHARS.len() {
236        if v < CHARS[i] {
237            v = CHARS[i];
238        }
239        i += 1;
240    }
241    v
242};
243
244const BITS: u32 = {
245    let mut bits = 0;
246    let mut i = 0;
247    while i < CHARS.len() {
248        bits |= 1 << (CHARS[i] - MIN_CHAR);
249        i += 1;
250    }
251    bits
252};
253
254/// Number of codepoints between the lowest and highest character that needs escaping, incl.
255const CHAR_RANGE: usize = (MAX_CHAR - MIN_CHAR + 1) as usize;
256
257#[repr(align(64))]
258struct Table([[AsciiChar; 2]; CHAR_RANGE]);
259
260/// For characters that need HTML escaping, the codepoint is formatted as decimal digits,
261/// otherwise `b"\0\0"`. Starting at [`MIN_CHAR`].
262const TABLE: &Table = &{
263    let mut table = Table([UNESCAPED; CHAR_RANGE]);
264    let mut i = 0;
265    while i < CHARS.len() {
266        let c = CHARS[i];
267        table.0[c as u32 as usize - MIN_CHAR as usize] = AsciiChar::two_hex_digits(c as u32);
268        i += 1;
269    }
270    table
271};
272
273const UNESCAPED: [AsciiChar; 2] = AsciiStr::new_sized("");
274
275const ESCAPED_BUF_INIT_UNPADDED: &str = "\\u00__";
276// RATIONALE: llvm generates better code if the buffer is register sized
277const ESCAPED_BUF_INIT: [AsciiChar; 8] = AsciiStr::new_sized(ESCAPED_BUF_INIT_UNPADDED);
278const ESCAPED_BUF_LEN: usize = ESCAPED_BUF_INIT_UNPADDED.len();
279
280#[cfg(all(test, feature = "alloc"))]
281mod tests {
282    use alloc::string::ToString;
283    use alloc::vec;
284
285    use super::*;
286
287    #[test]
288    fn test_ugly() {
289        assert_eq!(json(true).unwrap().to_string(), "true");
290        assert_eq!(json("foo").unwrap().to_string(), r#""foo""#);
291        assert_eq!(json(true).unwrap().to_string(), "true");
292        assert_eq!(json("foo").unwrap().to_string(), r#""foo""#);
293        assert_eq!(
294            json("<script>").unwrap().to_string(),
295            r#""\u003cscript\u003e""#
296        );
297        assert_eq!(
298            json(vec!["foo", "bar"]).unwrap().to_string(),
299            r#"["foo","bar"]"#
300        );
301    }
302
303    #[test]
304    fn test_pretty() {
305        assert_eq!(json_pretty(true, "").unwrap().to_string(), "true");
306        assert_eq!(
307            json_pretty("<script>", "").unwrap().to_string(),
308            r#""\u003cscript\u003e""#
309        );
310        assert_eq!(
311            json_pretty(vec!["foo", "bar"], "").unwrap().to_string(),
312            r#"[
313"foo",
314"bar"
315]"#
316        );
317        assert_eq!(
318            json_pretty(vec!["foo", "bar"], 2).unwrap().to_string(),
319            r#"[
320  "foo",
321  "bar"
322]"#
323        );
324        assert_eq!(
325            json_pretty(vec!["foo", "bar"], "————").unwrap().to_string(),
326            r#"[
327————"foo",
328————"bar"
329]"#
330        );
331    }
332}