Ferrit Explore
中文·繁體·EN·日本語 Sign in Register
cielxl / veld / src / handler / rewrite.rs
//! URL rewrite / redirect handler.
//!
//! Applies a chain of rewrite rules to the request URI, producing either an
//! internal rewrite (transparent path substitution), an HTTP redirect (301 or
//! 302), or a forced return with an arbitrary status code.
//!
//! Pattern matching is intentionally simple -- no regex crate dependency --
//! and supports:
//!
//! - Exact prefix match and replace
//! - A single `*` wildcard that captures the matched portion
//! - Back-references (`$1`) in the replacement string that refer to the
//!   wildcard capture
//!
//! This mirrors the most common nginx `rewrite` directive patterns while
//! keeping the implementation small, fast, and dependency-free.

use tracing::debug;

// ---------------------------------------------------------------------------
// RewriteRule
// ---------------------------------------------------------------------------

/// A single rewrite rule.
///
/// When the request URI matches [`pattern`](Self::pattern), the matched
/// portion is replaced according to [`replacement`](Self::replacement) and
/// the result is either an internal rewrite or an HTTP redirect.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RewriteRule {
    /// Pattern to match against the request URI.
    ///
    /// A plain string matches as an exact prefix.  A single `*` at the end
    /// (or anywhere in the pattern) acts as a wildcard that captures
    /// everything from that position onward.
    pub pattern: String,

    /// Replacement string.
    ///
    /// May contain `$1` to refer to the text captured by the wildcard.  If
    /// no wildcard was present in the pattern, `$1` expands to an empty
    /// string.
    pub replacement: String,

    /// When `true`, issue an HTTP redirect (301 or 302) instead of an
    /// internal rewrite.
    ///
    /// The status code is chosen by [`permanent`](Self::permanent).
    pub redirect: bool,

    /// When `true` (together with `redirect`), emit a **301 Moved
    /// Permanently**.  When `false`, emit a **302 Found** (temporary
    /// redirect).  Ignored when `redirect` is `false`.
    pub permanent: bool,

    /// When `true`, stop processing subsequent rules after this one matches.
    pub last: bool,
}

// ---------------------------------------------------------------------------
// RewriteResult
// ---------------------------------------------------------------------------

/// The outcome of applying a rewrite rule.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RewriteResult {
    /// Perform an HTTP redirect to the given URL with the specified status
    /// code (typically 301 or 302).
    Redirect(String, u16),

    /// Internally rewrite the URI (transparent to the client).
    Rewrite(String),

    /// Force a response with the given status code and body text, bypassing
    /// all further location processing.
    Return(u16, String),
}

// ---------------------------------------------------------------------------
// RewriteHandler
// ---------------------------------------------------------------------------

/// Processes an ordered list of [`RewriteRule`]s against a request URI.
///
/// Rules are evaluated in order; the first matching rule wins.  If a rule
/// has its `last` flag set, processing stops after that rule.
///
/// An optional *return directive* (`return_code`) short-circuits all rewrite
/// logic and forces an immediate response with the configured status code
/// and body.
#[derive(Debug, Clone)]
pub struct RewriteHandler {
    /// Ordered list of rewrite rules.
    rules: Vec<RewriteRule>,

    /// Optional forced return: `(status_code, body)`.
    ///
    /// When set, [`apply`](Self::apply) always returns
    /// [`RewriteResult::Return`] without evaluating any rules.
    return_code: Option<(u16, String)>,
}

impl RewriteHandler {
    // ------------------------------------------------------------------
    // Construction
    // ------------------------------------------------------------------

    /// Create a new rewrite handler with the given rules.
    pub fn new(rules: Vec<RewriteRule>) -> Self {
        Self {
            rules,
            return_code: None,
        }
    }

    /// Create a new rewrite handler with an explicit return directive.
    ///
    /// When [`apply`](Self::apply) is called it will always return
    /// `RewriteResult::Return(code, body)` without evaluating rules.
    pub fn with_return(code: u16, body: impl Into<String>) -> Self {
        Self {
            rules: Vec::new(),
            return_code: Some((code, body.into())),
        }
    }

    /// Set a return directive on an existing handler.
    pub fn set_return(&mut self, code: u16, body: impl Into<String>) {
        self.return_code = Some((code, body.into()));
    }

    /// Return a reference to the configured rules.
    pub fn rules(&self) -> &[RewriteRule] {
        &self.rules
    }

    // ------------------------------------------------------------------
    // Rule application
    // ------------------------------------------------------------------

    /// Apply the rewrite chain to `uri`.
    ///
    /// Returns `Some(RewriteResult)` when a rule matched (or a return
    /// directive is configured), or `None` when no rule matched -- meaning
    /// the URI should be processed as-is.
    pub fn apply(&self, uri: &str) -> Option<RewriteResult> {
        // Short-circuit: forced return directive.
        if let Some((code, ref body)) = self.return_code {
            debug!(uri = %uri, code = code, "rewrite: forced return");
            return Some(RewriteResult::Return(code, body.clone()));
        }

        for rule in &self.rules {
            if let Some(result) = Self::apply_rule(rule, uri) {
                debug!(
                    uri = %uri,
                    pattern = %rule.pattern,
                    redirect = rule.redirect,
                    last = rule.last,
                    "rewrite: rule matched"
                );

                // If `last` is set, no further rules are evaluated.  The
                // result is returned as-is.
                //
                // If `last` is NOT set and the result is a Rewrite, we
                // would ideally re-evaluate the remaining rules against the
                // new URI.  For simplicity (and because this mirrors common
                // nginx usage where `last` is almost always set alongside
                // `rewrite`) we currently apply only the first matching
                // rule.  A more complete implementation could loop.
                if rule.last {
                    return Some(result);
                }

                // Even without `last`, return the first match.  Callers
                // that need chained rewrites should set `last: true` on
                // intermediate rules and call `apply` in a loop.
                return Some(result);
            }
        }

        None
    }

    /// Attempt to match and apply a single rule against `uri`.
    ///
    /// Returns `Some(RewriteResult)` when the rule's pattern matches.
    fn apply_rule(rule: &RewriteRule, uri: &str) -> Option<RewriteResult> {
        let captured = match_pattern(&rule.pattern, uri)?;

        // Expand capture references ($1) in the replacement string.
        let new_uri = expand_replacement(&rule.replacement, &captured);

        if rule.redirect {
            let code: u16 = if rule.permanent { 301 } else { 302 };
            Some(RewriteResult::Redirect(new_uri, code))
        } else {
            Some(RewriteResult::Rewrite(new_uri))
        }
    }
}

// ---------------------------------------------------------------------------
// Pattern matching (no regex crate)
// ---------------------------------------------------------------------------

/// Match `uri` against `pattern` and return the captured wildcard text, if
/// any.
///
/// Pattern rules:
///
/// - A pattern without `*` performs an **exact prefix match**.  The entire
///   URI must start with the pattern text.  The captured text is everything
///   after the matched prefix.
///
/// - A pattern containing `*` splits into a prefix (before `*`) and an
///   optional suffix (after `*`).  The URI must start with the prefix; if a
///   suffix is present it must also end with the suffix.  The captured text
///   is the portion between prefix and suffix.
///
/// Returns `None` when the pattern does not match.
fn match_pattern<'a>(pattern: &str, uri: &'a str) -> Option<CapturedText<'a>> {
    if let Some(star_pos) = pattern.find('*') {
        let prefix = &pattern[..star_pos];
        let suffix = &pattern[star_pos + 1..];

        // The URI must start with the prefix.
        if !uri.starts_with(prefix) {
            return None;
        }

        let after_prefix = &uri[prefix.len()..];

        if suffix.is_empty() {
            // Pattern is `prefix*` -- capture everything after prefix.
            Some(CapturedText {
                capture: after_prefix,
                full_match_len: uri.len(),
            })
        } else {
            // Pattern is `prefix*suffix` -- URI must end with suffix.
            if !after_prefix.ends_with(suffix) {
                return None;
            }
            let capture_end = after_prefix.len() - suffix.len();
            Some(CapturedText {
                capture: &after_prefix[..capture_end],
                full_match_len: uri.len(),
            })
        }
    } else {
        // No wildcard: exact prefix match.
        uri.strip_prefix(pattern).map(|capture| CapturedText {
            capture,
            full_match_len: uri.len(),
        })
    }
}

/// The text captured by a wildcard (or the remainder after an exact prefix).
struct CapturedText<'a> {
    /// The captured substring.
    capture: &'a str,
    /// Total length of the matched portion of the URI (used for potential
    /// future extensions; currently informational).
    #[allow(dead_code)]
    full_match_len: usize,
}

/// Expand `$1` references in `replacement` with the captured text.
///
/// Only `$1` is supported -- there is a single capture group per pattern.
/// A literal `$$` produces a single `$`.
fn expand_replacement(replacement: &str, captured: &CapturedText<'_>) -> String {
    let cap = captured.capture;
    let mut out = String::with_capacity(replacement.len() + cap.len());
    let mut chars = replacement.chars().peekable();

    while let Some(ch) = chars.next() {
        if ch == '$' {
            match chars.next() {
                Some('1') => out.push_str(cap),
                Some('$') => out.push('$'),
                Some(other) => {
                    out.push('$');
                    out.push(other);
                }
                None => out.push('$'),
            }
        } else {
            out.push(ch);
        }
    }

    out
}

// ===========================================================================
// Tests
// ===========================================================================

#[cfg(test)]
mod tests {
    use super::*;

    // -- RewriteRule construction -------------------------------------------

    #[test]
    fn rule_debug_and_clone() {
        let rule = RewriteRule {
            pattern: "/old*".into(),
            replacement: "/new$1".into(),
            redirect: false,
            permanent: false,
            last: true,
        };
        let cloned = rule.clone();
        assert_eq!(rule, cloned);
        let debug_str = format!("{:?}", rule);
        assert!(debug_str.contains("/old*"));
    }

    // -- match_pattern: exact prefix ----------------------------------------

    #[test]
    fn exact_prefix_match() {
        let m = match_pattern("/blog", "/blog/post/1");
        assert!(m.is_some());
        assert_eq!(m.unwrap().capture, "/post/1");
    }

    #[test]
    fn exact_prefix_no_match() {
        assert!(match_pattern("/blog", "/news").is_none());
    }

    #[test]
    fn exact_prefix_full_match() {
        let m = match_pattern("/index", "/index");
        assert!(m.is_some());
        assert_eq!(m.unwrap().capture, "");
    }

    // -- match_pattern: wildcard at end -------------------------------------

    #[test]
    fn wildcard_suffix_captures_remainder() {
        let m = match_pattern("/images/*", "/images/logo.png");
        assert!(m.is_some());
        assert_eq!(m.unwrap().capture, "logo.png");
    }

    #[test]
    fn wildcard_suffix_no_match() {
        assert!(match_pattern("/images/*", "/assets/logo.png").is_none());
    }

    #[test]
    fn wildcard_suffix_empty_capture() {
        let m = match_pattern("/img/*", "/img/");
        assert!(m.is_some());
        assert_eq!(m.unwrap().capture, "");
    }

    // -- match_pattern: wildcard in the middle ------------------------------

    #[test]
    fn wildcard_middle_captures_between() {
        let m = match_pattern("/user/*/profile", "/user/42/profile");
        assert!(m.is_some());
        assert_eq!(m.unwrap().capture, "42");
    }

    #[test]
    fn wildcard_middle_suffix_mismatch() {
        assert!(match_pattern("/user/*/profile", "/user/42/settings").is_none());
    }

    #[test]
    fn wildcard_middle_longer_capture() {
        let m = match_pattern("/a/*/z", "/a/bcdef/z");
        assert!(m.is_some());
        assert_eq!(m.unwrap().capture, "bcdef");
    }

    // -- expand_replacement -------------------------------------------------

    #[test]
    fn expand_with_capture() {
        let cap = CapturedText {
            capture: "world",
            full_match_len: 11,
        };
        assert_eq!(expand_replacement("/hello/$1", &cap), "/hello/world");
    }

    #[test]
    fn expand_no_reference() {
        let cap = CapturedText {
            capture: "ignored",
            full_match_len: 7,
        };
        assert_eq!(expand_replacement("/static/page", &cap), "/static/page");
    }

    #[test]
    fn expand_multiple_references() {
        let cap = CapturedText {
            capture: "foo",
            full_match_len: 3,
        };
        assert_eq!(expand_replacement("$1-$1", &cap), "foo-foo");
    }

    #[test]
    fn expand_literal_dollar() {
        let cap = CapturedText {
            capture: "x",
            full_match_len: 1,
        };
        assert_eq!(expand_replacement("price: $$100$1", &cap), "price: $100x");
    }

    #[test]
    fn expand_trailing_dollar() {
        let cap = CapturedText {
            capture: "v",
            full_match_len: 1,
        };
        assert_eq!(expand_replacement("end$", &cap), "end$");
    }

    // -- RewriteHandler::apply ---------------------------------------------

    #[test]
    fn apply_return_directive() {
        let handler = RewriteHandler::with_return(503, "Service Unavailable");
        let result = handler.apply("/anything");
        assert_eq!(
            result,
            Some(RewriteResult::Return(503, "Service Unavailable".into()))
        );
    }

    #[test]
    fn apply_no_rules_returns_none() {
        let handler = RewriteHandler::new(vec![]);
        assert!(handler.apply("/any").is_none());
    }

    #[test]
    fn apply_internal_rewrite() {
        let rules = vec![RewriteRule {
            pattern: "/old-path".into(),
            replacement: "/new-path".into(),
            redirect: false,
            permanent: false,
            last: true,
        }];
        let handler = RewriteHandler::new(rules);
        assert_eq!(
            handler.apply("/old-path"),
            Some(RewriteResult::Rewrite("/new-path".into()))
        );
    }

    #[test]
    fn apply_permanent_redirect() {
        let rules = vec![RewriteRule {
            pattern: "/legacy".into(),
            replacement: "/modern".into(),
            redirect: true,
            permanent: true,
            last: true,
        }];
        let handler = RewriteHandler::new(rules);
        assert_eq!(
            handler.apply("/legacy"),
            Some(RewriteResult::Redirect("/modern".into(), 301))
        );
    }

    #[test]
    fn apply_temporary_redirect() {
        let rules = vec![RewriteRule {
            pattern: "/temp".into(),
            replacement: "/other".into(),
            redirect: true,
            permanent: false,
            last: true,
        }];
        let handler = RewriteHandler::new(rules);
        assert_eq!(
            handler.apply("/temp"),
            Some(RewriteResult::Redirect("/other".into(), 302))
        );
    }

    #[test]
    fn apply_wildcard_rewrite_with_capture() {
        let rules = vec![RewriteRule {
            pattern: "/blog/*".into(),
            replacement: "/articles/$1".into(),
            redirect: false,
            permanent: false,
            last: true,
        }];
        let handler = RewriteHandler::new(rules);
        assert_eq!(
            handler.apply("/blog/my-post"),
            Some(RewriteResult::Rewrite("/articles/my-post".into()))
        );
    }

    #[test]
    fn apply_wildcard_redirect() {
        let rules = vec![RewriteRule {
            pattern: "/old/*".into(),
            replacement: "/new/$1".into(),
            redirect: true,
            permanent: true,
            last: true,
        }];
        let handler = RewriteHandler::new(rules);
        assert_eq!(
            handler.apply("/old/page.html"),
            Some(RewriteResult::Redirect("/new/page.html".into(), 301))
        );
    }

    #[test]
    fn apply_first_matching_rule_wins() {
        let rules = vec![
            RewriteRule {
                pattern: "/a".into(),
                replacement: "/first".into(),
                redirect: false,
                permanent: false,
                last: false,
            },
            RewriteRule {
                pattern: "/a".into(),
                replacement: "/second".into(),
                redirect: false,
                permanent: false,
                last: true,
            },
        ];
        let handler = RewriteHandler::new(rules);
        assert_eq!(
            handler.apply("/a"),
            Some(RewriteResult::Rewrite("/first".into()))
        );
    }

    #[test]
    fn apply_skips_non_matching_rules() {
        let rules = vec![
            RewriteRule {
                pattern: "/no-match".into(),
                replacement: "/ignored".into(),
                redirect: false,
                permanent: false,
                last: true,
            },
            RewriteRule {
                pattern: "/target".into(),
                replacement: "/found".into(),
                redirect: false,
                permanent: false,
                last: true,
            },
        ];
        let handler = RewriteHandler::new(rules);
        assert_eq!(
            handler.apply("/target"),
            Some(RewriteResult::Rewrite("/found".into()))
        );
    }

    #[test]
    fn apply_none_when_no_match() {
        let rules = vec![RewriteRule {
            pattern: "/specific".into(),
            replacement: "/other".into(),
            redirect: false,
            permanent: false,
            last: true,
        }];
        let handler = RewriteHandler::new(rules);
        assert!(handler.apply("/completely-different").is_none());
    }

    // -- RewriteHandler::with_return + set_return ---------------------------

    #[test]
    fn set_return_overrides() {
        let mut handler = RewriteHandler::new(vec![]);
        assert!(handler.apply("/any").is_none());

        handler.set_return(403, "Forbidden");
        assert_eq!(
            handler.apply("/any"),
            Some(RewriteResult::Return(403, "Forbidden".into()))
        );
    }

    // -- RewriteHandler::rules() -------------------------------------------

    #[test]
    fn rules_accessor() {
        let rules = vec![RewriteRule {
            pattern: "/x".into(),
            replacement: "/y".into(),
            redirect: false,
            permanent: false,
            last: false,
        }];
        let handler = RewriteHandler::new(rules);
        assert_eq!(handler.rules().len(), 1);
        assert_eq!(handler.rules()[0].pattern, "/x");
    }

    // -- Edge cases ---------------------------------------------------------

    #[test]
    fn empty_uri_matches_empty_prefix() {
        let rules = vec![RewriteRule {
            pattern: "".into(),
            replacement: "/index".into(),
            redirect: false,
            permanent: false,
            last: true,
        }];
        let handler = RewriteHandler::new(rules);
        // Empty pattern matches everything as a prefix.
        assert_eq!(
            handler.apply(""),
            Some(RewriteResult::Rewrite("/index".into()))
        );
    }

    #[test]
    fn wildcard_middle_path_rewrite() {
        let rules = vec![RewriteRule {
            pattern: "/user/*/profile".into(),
            replacement: "/u/$1".into(),
            redirect: false,
            permanent: false,
            last: true,
        }];
        let handler = RewriteHandler::new(rules);
        assert_eq!(
            handler.apply("/user/42/profile"),
            Some(RewriteResult::Rewrite("/u/42".into()))
        );
    }

    #[test]
    fn multiple_wildcards_not_supported_second_star_literal() {
        // Only the first `*` is treated as a wildcard; the second is literal.
        let rules = vec![RewriteRule {
            pattern: "/a/*/b*".into(),
            replacement: "/replaced".into(),
            redirect: false,
            permanent: false,
            last: true,
        }];
        let handler = RewriteHandler::new(rules);
        // "/a/X/b*" -- the suffix after the first * is "b*", so the URI
        // must end with the literal string "b*".
        assert!(handler.apply("/a/X/b*").is_some());
        assert!(handler.apply("/a/X/bZ").is_none());
    }
}