cielxl/veld · Ferrit

cielxl / veld / src / config / parser.rs
//! Nginx-style configuration parser.
//!
//! Parses configuration text into an AST of [`Block`]s and [`Directive`]s.
//!
//! # Grammar (simplified)
//!
//! ```text
//! config     = directive*
//! directive  = name arg* (';' | '{' directive* '}')
//! name       = word
//! arg        = word | string | number | size | time
//! ```
//!
//! # Suffix conventions
//!
//! | Suffix | Category | Meaning              |
//! |--------|----------|----------------------|
//! | `k`    | Size     | Kilobytes (x1024)    |
//! | `m`    | Size     | Megabytes (x1024^2)  |
//! | `g`    | Size     | Gigabytes (x1024^3)  |
//! | `ms`   | Time     | Milliseconds         |
//! | `s`    | Time     | Seconds (x1000)      |
//! | `h`    | Time     | Hours (x3600000)     |
//! | `d`    | Time     | Days (x86400000)     |
//!
//! The `m` suffix is ambiguous in nginx configs -- it can mean either "minutes"
//! (time) or "megabytes" (size). This parser resolves `m` as **megabytes**
//! because that is the overwhelmingly more common usage (e.g. `client_max_body_size 10m`).
//! For time values in minutes, express the value in seconds instead (e.g. `1800s`
//! rather than `30m`).

use super::ast::{Block, Directive, Value};

// ---------------------------------------------------------------------------
// Error type
// ---------------------------------------------------------------------------

/// A configuration parse error with a line number for diagnostics.
#[derive(Debug, Clone)]
pub struct ConfigError {
    pub line: usize,
    pub message: String,
}

impl ConfigError {
    fn new(line: usize, message: impl Into<String>) -> Self {
        Self {
            line,
            message: message.into(),
        }
    }
}

impl std::fmt::Display for ConfigError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "config error at line {}: {}", self.line, self.message)
    }
}

impl std::error::Error for ConfigError {}

// ---------------------------------------------------------------------------
// Token (internal to the lexer / parser)
// ---------------------------------------------------------------------------

/// A single lexical token produced by the [`Lexer`].
///
/// Each variant carries its source line number so that later stages can
/// produce meaningful error messages.
#[derive(Debug, Clone)]
enum Token {
    /// An unquoted word (directive name, bare identifier, etc.).
    Word(String, usize),
    /// A double-quoted string whose escape sequences have already been resolved.
    String(String, usize),
    /// A plain integer literal (no suffix).
    Integer(i64, usize),
    /// A plain floating-point literal (no suffix).
    Float(f64, usize),
    /// A number with a size suffix (`k`, `m`, `g`). Value is in **bytes**.
    Size(u64, usize),
    /// A number with a time suffix (`ms`, `s`, `h`, `d`). Value is in **milliseconds**.
    Time(u64, usize),
    /// `{`
    LBrace(usize),
    /// `}`
    RBrace(usize),
    /// `;`
    Semicolon(usize),
}

impl Token {
    /// Return the source line where this token was produced.
    fn line(&self) -> usize {
        match self {
            Token::Word(_, l)
            | Token::String(_, l)
            | Token::Integer(_, l)
            | Token::Float(_, l)
            | Token::Size(_, l)
            | Token::Time(_, l)
            | Token::LBrace(l)
            | Token::RBrace(l)
            | Token::Semicolon(l) => *l,
        }
    }
}

// ---------------------------------------------------------------------------
// Lexer
// ---------------------------------------------------------------------------

/// Character-level lexer that turns raw configuration text into a flat stream
/// of [`Token`]s.
///
/// The lexer handles:
/// - `#` line comments (everything from `#` to end-of-line is discarded)
/// - Double-quoted strings with `\n`, `\r`, `\t`, `\\`, `\"` escapes
/// - Bare words (any contiguous run of non-special, non-whitespace characters)
/// - Numeric literals with optional size/time suffixes
/// - Single-character punctuation: `{`, `}`, `;`
struct Lexer {
    chars: Vec<char>,
    pos: usize,
    line: usize,
}

impl Lexer {
    fn new(input: &str) -> Self {
        Self {
            chars: input.chars().collect(),
            pos: 0,
            line: 1,
        }
    }

    // -- character-level helpers -------------------------------------------

    /// Peek at the current character without consuming it.
    fn peek(&self) -> Option<char> {
        self.chars.get(self.pos).copied()
    }

    /// Advance past the current character and return it.
    /// Increments the line counter when a newline is consumed.
    fn advance(&mut self) -> Option<char> {
        let ch = self.chars.get(self.pos).copied();
        if ch == Some('\n') {
            self.line += 1;
        }
        self.pos += 1;
        ch
    }

    /// Skip all contiguous whitespace characters.
    fn skip_whitespace(&mut self) {
        while let Some(ch) = self.peek() {
            if ch.is_whitespace() {
                self.advance();
            } else {
                break;
            }
        }
    }

    /// Skip a `#` line comment. The `#` has already been consumed.
    fn skip_comment(&mut self) {
        while let Some(ch) = self.peek() {
            if ch == '\n' {
                break;
            }
            self.advance();
        }
    }

    // -- token readers -----------------------------------------------------

    /// Read a double-quoted string literal. The opening `"` has already been
    /// consumed. Supports the following escape sequences:
    ///
    /// | Sequence | Replacement |
    /// |----------|-------------|
    /// | `\n`     | newline     |
    /// | `\r`     | carriage return |
    /// | `\t`     | tab         |
    /// | `\\`     | backslash   |
    /// | `\"`     | double quote |
    fn read_string(&mut self) -> Result<String, ConfigError> {
        let mut s = String::new();
        loop {
            match self.advance() {
                None => return Err(ConfigError::new(self.line, "unterminated string literal")),
                Some('"') => return Ok(s),
                Some('\\') => {
                    // Escape sequence
                    match self.advance() {
                        Some('n') => s.push('\n'),
                        Some('r') => s.push('\r'),
                        Some('t') => s.push('\t'),
                        Some('\\') => s.push('\\'),
                        Some('"') => s.push('"'),
                        Some(other) => {
                            // Unknown escape -- pass through literally so that
                            // the user sees the raw backslash + character.
                            s.push('\\');
                            s.push(other);
                        }
                        None => {
                            return Err(ConfigError::new(
                                self.line,
                                "unterminated escape sequence in string literal",
                            ))
                        }
                    }
                }
                Some(ch) => s.push(ch),
            }
        }
    }

    /// Read a bare word or number (anything not whitespace, not a comment
    /// character, not a brace, not a semicolon, not a quote).
    ///
    /// After reading the raw text, the lexer attempts to interpret it as a
    /// numeric literal with an optional size/time suffix.
    fn read_word_or_number(&mut self) -> Result<Token, ConfigError> {
        let start_line = self.line;
        let mut buf = String::new();

        while let Some(ch) = self.peek() {
            // A word terminates at whitespace or any structural character.
            if ch.is_whitespace() || ch == '{' || ch == '}' || ch == ';' || ch == '#' || ch == '"' {
                break;
            }
            buf.push(ch);
            self.advance();
        }

        if buf.is_empty() {
            return Err(ConfigError::new(start_line, "unexpected end of input"));
        }

        // Try to interpret the raw text as a number (possibly with a suffix).
        if let Some(token) = try_parse_number(&buf, start_line)? {
            return Ok(token);
        }

        // Fall back to a plain word.
        Ok(Token::Word(buf, start_line))
    }

    // -- public entry point ------------------------------------------------

    /// Produce a flat vector of tokens from the input text.
    fn tokenize(&mut self) -> Result<Vec<Token>, ConfigError> {
        let mut tokens = Vec::new();

        loop {
            self.skip_whitespace();

            match self.peek() {
                // End of input
                None => break,

                // Line comment -- skip to end of line
                Some('#') => {
                    self.advance();
                    self.skip_comment();
                }

                // Punctuation
                Some('{') => {
                    tokens.push(Token::LBrace(self.line));
                    self.advance();
                }
                Some('}') => {
                    tokens.push(Token::RBrace(self.line));
                    self.advance();
                }
                Some(';') => {
                    tokens.push(Token::Semicolon(self.line));
                    self.advance();
                }

                // Quoted string
                Some('"') => {
                    let line = self.line;
                    self.advance(); // consume opening "
                    let value = self.read_string()?;
                    tokens.push(Token::String(value, line));
                }

                // Everything else: word or number (possibly with suffix).
                Some(_) => {
                    tokens.push(self.read_word_or_number()?);
                }
            }
        }

        Ok(tokens)
    }
}

// ---------------------------------------------------------------------------
// Numeric / suffix helpers
// ---------------------------------------------------------------------------

/// Size multipliers (bytes).
const SIZE_K: u64 = 1024;
const SIZE_M: u64 = SIZE_K * 1024;
const SIZE_G: u64 = SIZE_M * 1024;

/// Time multipliers (milliseconds).
const TIME_MS: u64 = 1;
const TIME_S: u64 = 1000;
const TIME_H: u64 = 3600 * 1000;
const TIME_D: u64 = 86400 * 1000;

/// Try to parse a raw word as a number with an optional suffix.
///
/// Returns `Ok(None)` when the word does not look like a number at all (the
/// caller should then treat it as a plain `Word` token).
///
/// Resolution order (longest suffix match first):
///
/// 1. `ms` -- time (milliseconds)
/// 2. `k`  -- size (kilobytes)
/// 3. `m`  -- size (megabytes)
/// 4. `g`  -- size (gigabytes)
/// 5. `s`  -- time (seconds)
/// 6. `h`  -- time (hours)
/// 7. `d`  -- time (days)
/// 8. (no suffix) -- plain integer or float
///
/// The `m` suffix is assigned to **size** rather than time because that is its
/// dominant usage in real nginx configs (`client_max_body_size`, `proxy_buffer_size`,
/// etc.). See the module-level documentation for details.
fn try_parse_number(word: &str, line: usize) -> Result<Option<Token>, ConfigError> {
    // Quick check: if the word doesn't start with a digit or '-', it's not a number.
    if !word.starts_with(|c: char| c.is_ascii_digit() || c == '-') {
        return Ok(None);
    }

    // Helper: parse the numeric prefix (everything before the suffix).
    let parse_u64_prefix = |prefix: &str| -> Result<u64, ConfigError> {
        prefix
            .parse::<u64>()
            .map_err(|_| ConfigError::new(line, format!("invalid number: '{}'", word)))
    };

    // 1. Two-character time suffix: "ms"
    if let Some(prefix) = word.strip_suffix("ms") {
        let n = parse_u64_prefix(prefix)?;
        return Ok(Some(Token::Time(n * TIME_MS, line)));
    }

    // 2. Single-character size suffixes: k, m, g
    if let Some(prefix) = word.strip_suffix('k') {
        let n = parse_u64_prefix(prefix)?;
        return Ok(Some(Token::Size(n * SIZE_K, line)));
    }
    if let Some(prefix) = word.strip_suffix('m') {
        let n = parse_u64_prefix(prefix)?;
        return Ok(Some(Token::Size(n * SIZE_M, line)));
    }
    if let Some(prefix) = word.strip_suffix('g') {
        let n = parse_u64_prefix(prefix)?;
        return Ok(Some(Token::Size(n * SIZE_G, line)));
    }

    // 3. Single-character time suffixes: s, h, d
    if let Some(prefix) = word.strip_suffix('s') {
        let n = parse_u64_prefix(prefix)?;
        return Ok(Some(Token::Time(n * TIME_S, line)));
    }
    if let Some(prefix) = word.strip_suffix('h') {
        let n = parse_u64_prefix(prefix)?;
        return Ok(Some(Token::Time(n * TIME_H, line)));
    }
    if let Some(prefix) = word.strip_suffix('d') {
        let n = parse_u64_prefix(prefix)?;
        return Ok(Some(Token::Time(n * TIME_D, line)));
    }

    // 4. No suffix -- try plain integer or float.
    //    Use i128 during parsing so we can detect overflow gracefully, then
    //    narrow to i64.
    if let Ok(n) = word.parse::<i64>() {
        return Ok(Some(Token::Integer(n, line)));
    }
    if let Ok(f) = word.parse::<f64>() {
        // Reject values that are not finite (NaN, Inf).
        if f.is_finite() {
            return Ok(Some(Token::Float(f, line)));
        }
        return Err(ConfigError::new(
            line,
            format!("non-finite float literal: '{}'", word),
        ));
    }

    // Not a number at all -- the caller will treat it as a Word.
    Ok(None)
}

/// Convenience: convert a raw word string directly into a [`Value`].
///
/// This is used during parsing when a directive argument arrives as a plain
/// `Word` token (e.g. `on`, `off`, or a suffixed number that the lexer did
/// not pre-classify -- though currently the lexer handles all numeric forms).
///
/// Falls back to [`Value::String`] when no numeric or boolean interpretation
/// applies.
fn word_to_value(word: &str, line: usize) -> Result<Value, ConfigError> {
    // Boolean keywords.
    match word {
        "on" | "true" | "yes" => return Ok(Value::Bool(true)),
        "off" | "false" | "no" => return Ok(Value::Bool(false)),
        _ => {}
    }

    // Delegate to the same suffix logic used by the lexer.
    if let Some(token) = try_parse_number(word, line)? {
        return match token {
            Token::Integer(n, _) => Ok(Value::Number(n)),
            Token::Float(f, _) => Ok(Value::Float(f)),
            Token::Size(bytes, _) => Ok(Value::Size(bytes)),
            Token::Time(ms, _) => Ok(Value::Time(ms)),
            _ => unreachable!("try_parse_number only returns numeric token variants"),
        };
    }

    Ok(Value::String(word.to_owned()))
}

/// Convert a [`Token`] into a [`Value`].
///
/// Quoted strings and numeric tokens map directly; `Word` tokens are resolved
/// through [`word_to_value`].
fn token_to_value(token: &Token) -> Result<Value, ConfigError> {
    match token {
        Token::String(s, _) => Ok(Value::String(s.clone())),
        Token::Integer(n, _) => Ok(Value::Number(*n)),
        Token::Float(f, _) => Ok(Value::Float(*f)),
        Token::Size(bytes, _) => Ok(Value::Size(*bytes)),
        Token::Time(ms, _) => Ok(Value::Time(*ms)),
        Token::Word(w, line) => word_to_value(w, *line),
        Token::LBrace(line) => Err(ConfigError::new(*line, "unexpected '{'")),
        Token::RBrace(line) => Err(ConfigError::new(*line, "unexpected '}'")),
        Token::Semicolon(line) => Err(ConfigError::new(*line, "unexpected ';'")),
    }
}

// ---------------------------------------------------------------------------
// Parser
// ---------------------------------------------------------------------------

/// Recursive-descent parser that consumes a flat token stream and produces an
/// AST rooted at a [`Block`].
struct Parser {
    tokens: Vec<Token>,
    pos: usize,
}

impl Parser {
    fn new(tokens: Vec<Token>) -> Self {
        Self { tokens, pos: 0 }
    }

    // -- helpers -----------------------------------------------------------

    /// Peek at the current token without consuming it.
    fn peek(&self) -> Option<&Token> {
        self.tokens.get(self.pos)
    }

    /// Advance past the current token and return a reference to it.
    fn advance(&mut self) -> Option<&Token> {
        let tok = self.tokens.get(self.pos);
        self.pos += 1;
        tok
    }

    /// Return `true` when every token has been consumed.
    fn is_eof(&self) -> bool {
        self.pos >= self.tokens.len()
    }

    /// Consume and return the current token, or produce an error with the
    /// given message using the line number of the previous token (or 1 if at
    /// the very start).
    fn expect_advance(&mut self, what: &str) -> Result<&Token, ConfigError> {
        if self.pos < self.tokens.len() {
            let tok = &self.tokens[self.pos];
            self.pos += 1;
            Ok(tok)
        } else {
            let line = self.tokens.last().map(|t| t.line()).unwrap_or(1);
            Err(ConfigError::new(
                line,
                format!("expected {}, got end of input", what),
            ))
        }
    }

    // -- grammar rules -----------------------------------------------------

    /// Parse the entire token stream into a top-level [`Block`].
    ///
    /// ```text
    /// config = directive*
    /// ```
    fn parse_config(&mut self) -> Result<Block, ConfigError> {
        let mut block = Block::new();

        while !self.is_eof() {
            block.directives.push(self.parse_directive()?);
        }

        Ok(block)
    }

    /// Parse a single directive, which may optionally be followed by a block.
    ///
    /// ```text
    /// directive = name arg* (';' | '{' directive* '}')
    /// ```
    fn parse_directive(&mut self) -> Result<Directive, ConfigError> {
        // -- name ----------------------------------------------------------
        let name_token = self.expect_advance("directive name")?;
        let (name, line) = match name_token {
            Token::Word(w, l) => (w.clone(), *l),
            Token::String(s, l) => (s.clone(), *l),
            // A stray closing brace has no matching block to terminate.
            Token::RBrace(l) => return Err(ConfigError::new(*l, "unexpected '}'")),
            other => {
                return Err(ConfigError::new(
                    other.line(),
                    format!(
                        "expected directive name, found '{}'",
                        token_debug_str(other)
                    ),
                ))
            }
        };

        // -- arguments (everything up to ';' or '{') -----------------------
        let mut args: Vec<Value> = Vec::new();

        loop {
            match self.peek() {
                // End of input -- implicit semicolon at EOF.
                None => break,

                // Semicolon terminates a simple directive (no block).
                Some(Token::Semicolon(_)) => {
                    self.advance();
                    return Ok(Directive::new(name, args, line));
                }

                // Opening brace starts a block directive.
                Some(Token::LBrace(_)) => {
                    break;
                }

                // Closing brace without a matching open is always an error.
                Some(Token::RBrace(brace_line)) => {
                    return Err(ConfigError::new(
                        *brace_line,
                        "unexpected '}' without matching '{'",
                    ))
                }

                // Anything else is an argument.
                Some(tok) => {
                    let tok_clone = tok.clone();
                    self.advance();
                    args.push(token_to_value(&tok_clone)?);
                }
            }
        }

        // -- optional block ------------------------------------------------
        match self.peek() {
            Some(Token::LBrace(_)) => {
                self.advance(); // consume '{'
                let block = self.parse_block_body()?;
                // Expect closing '}'
                match self.advance() {
                    Some(Token::RBrace(_)) => {}
                    Some(other) => {
                        return Err(ConfigError::new(
                            other.line(),
                            format!(
                                "expected '}}' to close block opened on line {}, found '{}'",
                                line,
                                token_debug_str(other)
                            ),
                        ))
                    }
                    None => {
                        return Err(ConfigError::new(
                            line,
                            "unterminated block: expected '}' before end of input",
                        ))
                    }
                }
                Ok(Directive::new(name, args, line).with_block(block))
            }
            _ => {
                // No block -- this is a simple directive.
                Ok(Directive::new(name, args, line))
            }
        }
    }

    /// Parse the interior of a `{ ... }` block (exclusive of the braces
    /// themselves, which are consumed by the caller).
    fn parse_block_body(&mut self) -> Result<Block, ConfigError> {
        let mut block = Block::new();

        while let Some(tok) = self.peek() {
            // A closing brace ends the block; the caller will consume it.
            if matches!(tok, Token::RBrace(_)) {
                break;
            }
            block.directives.push(self.parse_directive()?);
        }

        Ok(block)
    }
}

/// Produce a short human-readable description of a token for error messages.
fn token_debug_str(token: &Token) -> String {
    match token {
        Token::Word(w, _) => w.clone(),
        Token::String(s, _) => format!("\"{}\"", s),
        Token::Integer(n, _) => n.to_string(),
        Token::Float(f, _) => format!("{}", f),
        Token::Size(b, _) => format!("{}bytes", b),
        Token::Time(ms, _) => format!("{}ms", ms),
        Token::LBrace(_) => "{".to_owned(),
        Token::RBrace(_) => "}".to_owned(),
        Token::Semicolon(_) => ";".to_owned(),
    }
}

// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------

/// Parse an nginx-style configuration string into an AST [`Block`].
///
/// # Errors
///
/// Returns a [`ConfigError`] if the input contains syntax errors such as
/// unterminated strings, unmatched braces, or invalid number literals.
///
/// # Examples
///
/// ```
/// use veld::config::parser::parse;
///
/// let config = parse(r#"
///     worker_processes 4;
///     http {
///         server {
///             listen 80;
///             location / {
///                 root /var/www/html;
///             }
///         }
///     }
/// "#).expect("valid config");
///
/// // Numeric values are parsed as integers; read them with `get_i64`.
/// assert_eq!(config.get_i64("worker_processes"), Some(4));
/// ```
pub fn parse(input: &str) -> Result<Block, ConfigError> {
    let tokens = Lexer::new(input).tokenize()?;
    Parser::new(tokens).parse_config()
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    // -- lexer / tokenizer tests -------------------------------------------

    #[test]
    fn test_empty_input() {
        let block = parse("").unwrap();
        assert!(block.is_empty());
    }

    #[test]
    fn test_simple_directive() {
        let block = parse("worker_processes 4;").unwrap();
        assert_eq!(block.directives.len(), 1);
        let d = &block.directives[0];
        assert_eq!(d.name, "worker_processes");
        assert_eq!(d.args, vec![Value::Number(4)]);
    }

    #[test]
    fn test_multiple_directives() {
        let block = parse("a 1; b 2; c 3;").unwrap();
        assert_eq!(block.directives.len(), 3);
        assert_eq!(block.directives[0].name, "a");
        assert_eq!(block.directives[1].name, "b");
        assert_eq!(block.directives[2].name, "c");
    }

    #[test]
    fn test_quoted_string() {
        let block = parse(r#"server_name "example.com";"#).unwrap();
        let d = &block.directives[0];
        assert_eq!(d.name, "server_name");
        assert_eq!(d.args, vec![Value::String("example.com".into())]);
    }

    #[test]
    fn test_string_escapes() {
        let block = parse(r#"msg "hello\nworld\t\"end\\" ;"#).unwrap();
        let val = block.directives[0].args[0].as_str();
        assert_eq!(val, "hello\nworld\t\"end\\");
    }

    #[test]
    fn test_nested_blocks() {
        let input = r#"
            http {
                server {
                    listen 80;
                    location / {
                        root /var/www;
                    }
                }
            }
        "#;
        let block = parse(input).unwrap();
        let http = block.get("http").unwrap();
        let server = http.block.as_ref().unwrap().get("server").unwrap();
        let server_block = server.block.as_ref().unwrap();
        // `listen 80` is parsed as a numeric value; read it as an integer.
        assert_eq!(server_block.get_i64("listen"), Some(80));
        let loc = server_block.get("location").unwrap();
        assert_eq!(loc.args[0].as_str(), "/");
        let loc_block = loc.block.as_ref().unwrap();
        assert_eq!(loc_block.get_str("root"), Some("/var/www"));
    }

    #[test]
    fn test_comments() {
        let input = r#"
            # This is a comment
            worker_processes 4; # inline comment
            # Another comment
        "#;
        let block = parse(input).unwrap();
        assert_eq!(block.directives.len(), 1);
        assert_eq!(block.directives[0].name, "worker_processes");
    }

    #[test]
    fn test_size_suffixes() {
        let block = parse("buf 1k; big 2m; huge 3g;").unwrap();
        assert_eq!(block.directives[0].args, vec![Value::Size(1024)]);
        assert_eq!(block.directives[1].args, vec![Value::Size(2 * 1024 * 1024)]);
        assert_eq!(
            block.directives[2].args,
            vec![Value::Size(3 * 1024 * 1024 * 1024)]
        );
    }

    #[test]
    fn test_time_suffixes() {
        let block = parse("t1 500ms; t2 30s; t3 2h; t4 1d;").unwrap();
        assert_eq!(block.directives[0].args, vec![Value::Time(500)]);
        assert_eq!(block.directives[1].args, vec![Value::Time(30_000)]);
        assert_eq!(block.directives[2].args, vec![Value::Time(7_200_000)]);
        assert_eq!(block.directives[3].args, vec![Value::Time(86_400_000)]);
    }

    #[test]
    fn test_size_m_means_megabytes() {
        let block = parse("limit 10m;").unwrap();
        assert_eq!(
            block.directives[0].args,
            vec![Value::Size(10 * 1024 * 1024)]
        );
    }

    #[test]
    fn test_float_literal() {
        let block = parse("ratio 2.5;").unwrap();
        assert_eq!(block.directives[0].args, vec![Value::Float(2.5)]);
    }

    #[test]
    fn test_multiple_args() {
        let block = parse("proxy_pass http localhost 8080;").unwrap();
        let d = &block.directives[0];
        assert_eq!(d.name, "proxy_pass");
        assert_eq!(d.args.len(), 3);
        assert_eq!(d.args[0].as_str(), "http");
        assert_eq!(d.args[1].as_str(), "localhost");
        assert_eq!(d.args[2], Value::Number(8080));
    }

    #[test]
    fn test_bool_keywords() {
        let block = parse("gzip on; ssl off;").unwrap();
        assert_eq!(block.directives[0].args, vec![Value::Bool(true)]);
        assert_eq!(block.directives[1].args, vec![Value::Bool(false)]);
    }

    #[test]
    fn test_line_tracking() {
        let input = r#"
            foo 1;
            bar 2;
            baz 3;
        "#;
        let block = parse(input).unwrap();
        // Line 1 is the empty first line; "foo" is on line 2, etc.
        assert_eq!(block.directives[0].line, 2);
        assert_eq!(block.directives[1].line, 3);
        assert_eq!(block.directives[2].line, 4);
    }

    // -- error cases -------------------------------------------------------

    #[test]
    fn test_unterminated_string() {
        let result = parse(r#"name "unclosed;"#);
        assert!(result.is_err());
        assert!(result.unwrap_err().message.contains("unterminated"));
    }

    #[test]
    fn test_unmatched_brace_close() {
        let result = parse("}");
        assert!(result.is_err());
        assert!(result.unwrap_err().message.contains("unexpected"));
    }

    #[test]
    fn test_unterminated_block() {
        let result = parse("server { listen 80;");
        assert!(result.is_err());
        assert!(result.unwrap_err().message.contains("unterminated"));
    }

    #[test]
    fn test_invalid_number() {
        let result = parse("val 99999999999999999999k;");
        assert!(result.is_err());
        assert!(result.unwrap_err().message.contains("invalid number"));
    }

    #[test]
    fn test_directive_with_block_and_args() {
        let input = "location /api {\n  proxy_pass http://backend;\n}";
        let block = parse(input).unwrap();
        let loc = &block.directives[0];
        assert_eq!(loc.name, "location");
        assert_eq!(loc.args, vec![Value::String("/api".into())]);
        let inner = loc.block.as_ref().unwrap();
        assert_eq!(inner.directives.len(), 1);
        assert_eq!(inner.directives[0].name, "proxy_pass");
    }

    #[test]
    fn test_realistic_config() {
        let input = r#"
            worker_processes auto;
            error_log /var/log/nginx/error.log warn;
            pid /run/nginx.pid;

            events {
                worker_connections 1024;
            }

            http {
                include       mime.types;
                default_type  application/octet-stream;

                log_format main '$remote_addr - $remote_user [$time_local] '
                                '"$request" $status $body_bytes_sent '
                                '"$http_referer" "$http_user_agent"';

                sendfile on;
                keepalive_timeout 65;

                server {
                    listen 80;
                    server_name localhost;

                    location / {
                        root   /usr/share/nginx/html;
                        index  index.html index.htm;
                    }

                    error_page 500 502 503 504 /50x.html;
                    location = /50x.html {
                        root /usr/share/nginx/html;
                    }
                }
            }
        "#;

        let block = parse(input).unwrap();
        assert_eq!(block.get_str("worker_processes"), Some("auto"));
        assert_eq!(block.get_str("error_log"), Some("/var/log/nginx/error.log"));

        let events = block.get("events").unwrap().block.as_ref().unwrap();
        assert_eq!(events.get_i64("worker_connections"), Some(1024));

        let http = block.get("http").unwrap().block.as_ref().unwrap();
        assert_eq!(http.get_bool("sendfile"), Some(true));
        assert_eq!(http.get_i64("keepalive_timeout"), Some(65));

        let servers = http.get_all("server");
        assert_eq!(servers.len(), 1);

        let server_block = servers[0].block.as_ref().unwrap();
        assert_eq!(server_block.get_i64("listen"), Some(80));
    }
}