Ferrit Explore
中文·繁體·EN·日本語 Sign in Register
cielxl / veld / src / log / access.rs
//! Access log following the nginx combined log format.
//!
//! Provides [`AccessLog`] which buffers log entries in memory and flushes
//! them to disk asynchronously via [`tokio::io::BufWriter`].  Log
//! rotation is supported by calling [`AccessLog::reopen`], which flushes
//! the current buffer and closes the file handle so the next write
//! re-opens the (now-rotated) path.
//!
//! # Formats
//!
//! | Variant   | Description                                              |
//! |-----------|----------------------------------------------------------|
//! | `Combined`| nginx combined log format (with request duration)        |
//! | `Common`  | Common Log Format (CLF)                                  |
//! | `Custom`  | User-supplied format string with `$variable` placeholders|
//!
//! # Custom format variables
//!
//! `$remote_addr`, `$remote_user`, `$time_local`, `$request`,
//! `$status`, `$body_bytes_sent`, `$http_referer`,
//! `$http_user_agent`, `$request_time`

use std::io;
use std::path::PathBuf;
use std::time::Duration;

use chrono::Local;
use tokio::fs::OpenOptions;
use tokio::io::{AsyncWriteExt, BufWriter};

use crate::http::request::Request;
use crate::http::response::Response;

// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------

/// Default capacity for the in-memory log buffer (bytes).
const DEFAULT_BUFFER_CAPACITY: usize = 8192;

/// Flush the buffer to the underlying writer once it exceeds this size.
const FLUSH_THRESHOLD: usize = 8192;

// ---------------------------------------------------------------------------
// LogBuffer
// ---------------------------------------------------------------------------

/// An in-memory byte buffer that accumulates formatted log lines and
/// flushes them in bulk for fewer I/O syscalls.
struct LogBuffer {
    buf: Vec<u8>,
    threshold: usize,
}

impl LogBuffer {
    fn with_capacity(capacity: usize) -> Self {
        Self {
            buf: Vec::with_capacity(capacity),
            threshold: FLUSH_THRESHOLD,
        }
    }

    /// Append `data` to the buffer.
    #[inline]
    fn extend_from_slice(&mut self, data: &[u8]) {
        self.buf.extend_from_slice(data);
    }

    /// Append a single byte.
    #[inline]
    fn push(&mut self, byte: u8) {
        self.buf.push(byte);
    }

    /// Return `true` if the buffer has reached the flush threshold.
    #[inline]
    fn should_flush(&self) -> bool {
        self.buf.len() >= self.threshold
    }

    /// Return `true` if the buffer is empty.
    #[inline]
    fn is_empty(&self) -> bool {
        self.buf.is_empty()
    }

    /// Drain the buffer contents, leaving it empty.
    fn drain(&mut self) -> Vec<u8> {
        std::mem::take(&mut self.buf)
    }

    /// Clear the buffer without returning its contents.
    fn clear(&mut self) {
        self.buf.clear();
    }
}

// ---------------------------------------------------------------------------
// LogFormat
// ---------------------------------------------------------------------------

/// The log format used when writing access entries.
#[derive(Debug, Clone)]
pub enum LogFormat {
    /// nginx combined log format (with request duration appended).
    ///
    /// ```text
    /// $remote_addr - $remote_user [$time_local] "$request"
    ///   $status $body_bytes_sent "$http_referer" "$http_user_agent"
    /// ```
    Combined,

    /// Common Log Format (CLF).
    ///
    /// ```text
    /// $remote_addr - $remote_user [$time_local] "$request"
    ///   $status $body_bytes_sent
    /// ```
    Common,

    /// A custom format string.  Placeholders use the `$variable` syntax
    /// (e.g. `$remote_addr`).  Unrecognised placeholders are left as-is.
    Custom(String),
}

// ---------------------------------------------------------------------------
// AccessLog
// ---------------------------------------------------------------------------

/// Production access logger with buffered async I/O.
///
/// # Usage
///
/// ```ignore
/// let mut log = AccessLog::new("/var/log/nginx/access.log".into(),
///                              LogFormat::Combined);
/// log.open().await?;
///
/// // Inside the request handler:
/// log.log(&request, &response, elapsed).await;
///
/// // Periodically (e.g. via SIGUSR1):
/// log.reopen().await?;
/// ```
pub struct AccessLog {
    /// Path to the log file on disk.
    path: PathBuf,

    /// Active log format.
    format: LogFormat,

    /// In-memory buffer that batches log lines.
    buffer: LogBuffer,

    /// Buffered async file writer.  `None` until [`open`](Self::open) is
    /// called, and reset to `None` after [`reopen`](Self::reopen).
    writer: Option<BufWriter<tokio::fs::File>>,
}

impl AccessLog {
    /// Create a new access logger.  The log file is **not** opened until
    /// [`open`](Self::open) is called.
    pub fn new(path: PathBuf, format: LogFormat) -> Self {
        Self {
            path,
            format,
            buffer: LogBuffer::with_capacity(DEFAULT_BUFFER_CAPACITY),
            writer: None,
        }
    }

    /// Open (or re-open) the log file for writing.
    ///
    /// The file is opened in append mode so multiple workers can share
    /// the same path.  If the file does not exist it is created with
    /// mode `0o644`.
    pub async fn open(&mut self) -> io::Result<()> {
        let file = OpenOptions::new()
            .create(true)
            .append(true)
            .open(&self.path)
            .await?;

        self.writer = Some(BufWriter::new(file));
        Ok(())
    }

    /// Log a completed request/response cycle.
    ///
    /// The entry is formatted according to the configured [`LogFormat`]
    /// and appended to the in-memory buffer.  When the buffer exceeds
    /// [`FLUSH_THRESHOLD`] bytes it is automatically flushed to disk.
    pub async fn log(&mut self, request: &Request, response: &Response, duration: Duration) {
        let line = match &self.format {
            LogFormat::Combined => Self::format_combined(request, response, duration),
            LogFormat::Common => Self::format_common(request, response),
            LogFormat::Custom(fmt) => Self::format_custom(fmt, request, response, duration),
        };

        self.buffer.extend_from_slice(line.as_bytes());
        self.buffer.push(b'\n');

        if self.buffer.should_flush() {
            // Best-effort flush -- a transient I/O error should not
            // crash the request handler.
            let _ = self.flush().await;
        }
    }

    /// Flush all buffered log entries to disk.
    ///
    /// If no file has been opened yet (or it was closed by [`reopen`]),
    /// the buffer contents are silently discarded.
    pub async fn flush(&mut self) -> io::Result<()> {
        if self.buffer.is_empty() {
            return Ok(());
        }

        let writer = match &mut self.writer {
            Some(w) => w,
            None => {
                // No file open -- drop the buffered data.  This can
                // happen if log() is called before open().
                self.buffer.clear();
                return Ok(());
            }
        };

        // Take the buffer contents out so we can write without holding
        // a borrow on self.buffer.
        let data = self.buffer.drain();
        writer.write_all(&data).await?;
        writer.flush().await?;

        Ok(())
    }

    /// Reopen the log file, supporting external log rotation.
    ///
    /// Typical usage: an external tool moves the current log file aside
    /// and sends `SIGUSR1` to the server, which calls this method.
    /// The buffered data is flushed to the **old** file first, then the
    /// handle is dropped so the next write creates / opens the new file.
    pub async fn reopen(&mut self) -> io::Result<()> {
        // Flush remaining data to the old file.
        self.flush().await?;

        // Drop the writer.  The next call to flush() will need
        // open() to be called again.
        self.writer = None;
        Ok(())
    }

    // -- Format helpers -----------------------------------------------------

    /// Format a log line in nginx *combined* format.
    ///
    /// ```text
    /// 192.168.1.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /index.html HTTP/1.1" 200 2326 "http://www.example.com" "Mozilla/5.0"
    /// ```
    fn format_combined(request: &Request, response: &Response, duration: Duration) -> String {
        let remote_addr = Self::remote_addr(request);
        let time_local = Self::time_local();
        let request_line = Self::request_line(request);
        let status = response.status.as_u16();
        let body_bytes = response.body.len();
        let referer = Self::header_value(request, "referer");
        let user_agent = Self::header_value(request, "user-agent");
        let duration_ms = duration.as_millis();

        format!(
            r#"{} - - [{}] "{}" {} {} "{}" "{}" {}ms"#,
            remote_addr,
            time_local,
            request_line,
            status,
            body_bytes,
            referer,
            user_agent,
            duration_ms,
        )
    }

    /// Format a log line in Common Log Format (CLF).
    fn format_common(request: &Request, response: &Response) -> String {
        let remote_addr = Self::remote_addr(request);
        let time_local = Self::time_local();
        let request_line = Self::request_line(request);
        let status = response.status.as_u16();
        let body_bytes = response.body.len();

        format!(
            r#"{} - - [{}] "{}" {} {}"#,
            remote_addr, time_local, request_line, status, body_bytes,
        )
    }

    /// Format a log line from a custom format string.
    ///
    /// Replaces `$variable` placeholders with their corresponding values.
    fn format_custom(
        fmt: &str,
        request: &Request,
        response: &Response,
        duration: Duration,
    ) -> String {
        // Pre-compute all values once so we can replace without
        // re-evaluating each variable multiple times.
        let remote_addr = Self::remote_addr(request);
        let time_local = Self::time_local();
        let request_line = Self::request_line(request);
        let status = response.status.as_u16().to_string();
        let body_bytes = response.body.len().to_string();
        let referer = Self::header_value(request, "referer");
        let user_agent = Self::header_value(request, "user-agent");
        let request_time = format!("{:.3}", duration.as_secs_f64());

        let mut result = String::with_capacity(fmt.len() + 256);

        // Walk the format string and substitute variables.
        let mut chars = fmt.chars().peekable();
        while let Some(ch) = chars.next() {
            if ch == '$' {
                // Read the variable name (letters, digits, underscore).
                let mut var = String::new();
                while let Some(&c) = chars.peek() {
                    if c.is_ascii_alphanumeric() || c == '_' {
                        var.push(c);
                        chars.next();
                    } else {
                        break;
                    }
                }

                match var.as_str() {
                    "remote_addr" => result.push_str(&remote_addr),
                    "remote_user" => result.push('-'),
                    "time_local" => result.push_str(&time_local),
                    "request" => result.push_str(&request_line),
                    "status" => result.push_str(&status),
                    "body_bytes_sent" => result.push_str(&body_bytes),
                    "http_referer" => result.push_str(&referer),
                    "http_user_agent" => result.push_str(&user_agent),
                    "request_time" => result.push_str(&request_time),
                    _ => {
                        // Unknown variable -- emit the literal text.
                        result.push('$');
                        result.push_str(&var);
                    }
                }
            } else {
                result.push(ch);
            }
        }

        result
    }

    // -- Shared formatting helpers ------------------------------------------

    /// Extract the client IP address from the request, defaulting to `"-"`.
    fn remote_addr(request: &Request) -> String {
        request
            .remote_addr
            .map(|a| a.ip().to_string())
            .unwrap_or_else(|| "-".to_string())
    }

    /// Format the current local time in the nginx `$time_local` style:
    /// `10/Oct/2000:13:55:36 -0700`.
    fn time_local() -> String {
        Local::now().format("%d/%b/%Y:%H:%M:%S %z").to_string()
    }

    /// Reconstruct the HTTP request line: `METHOD URI VERSION`.
    fn request_line(request: &Request) -> String {
        format!("{} {} {}", request.method, request.uri, request.version)
    }

    /// Return the first value of a request header, or `"-"` if absent.
    ///
    /// Quotes and backslashes in the value are escaped to prevent log
    /// injection.
    fn header_value(request: &Request, name: &str) -> String {
        match request.headers.get(name) {
            Some(val) => match val.to_str() {
                Ok(s) => Self::escape_log_value(s),
                Err(_) => "-".to_string(),
            },
            None => "-".to_string(),
        }
    }

    /// Escape characters that could break log parsers or allow injection.
    ///
    /// Replaces `"` with `\"` and `\` with `\\`.
    fn escape_log_value(s: &str) -> String {
        let mut out = String::with_capacity(s.len());
        for ch in s.chars() {
            match ch {
                '"' => out.push_str("\\\""),
                '\\' => out.push_str("\\\\"),
                '\n' => out.push_str("\\n"),
                '\r' => out.push_str("\\r"),
                _ => out.push(ch),
            }
        }
        out
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::http::request::{Method, Request, Version};
    use crate::http::response::Response;
    use std::net::SocketAddr;
    use std::str::FromStr;

    fn make_request(method: Method, uri: &str) -> Request {
        let mut req = Request::new(method, uri.to_owned(), Version::Http11);
        req.remote_addr = Some(SocketAddr::from_str("192.168.1.100:54321").unwrap());
        req.headers.insert("referer", "http://example.com");
        req.headers.insert("user-agent", "TestAgent/1.0");
        req
    }

    #[test]
    fn combined_format_basic() {
        let req = make_request(Method::GET, "/index.html");
        let resp = Response::ok();
        let dur = Duration::from_millis(42);

        let line = AccessLog::format_combined(&req, &resp, dur);

        assert!(line.starts_with("192.168.1.100 - - ["));
        assert!(line.contains(r#""GET /index.html HTTP/1.1""#));
        assert!(line.contains("200"));
        assert!(line.contains(r#""http://example.com""#));
        assert!(line.contains(r#""TestAgent/1.0""#));
        assert!(line.contains("42ms"));
    }

    #[test]
    fn common_format_basic() {
        let req = make_request(Method::POST, "/submit");
        let resp = Response::ok();
        let line = AccessLog::format_common(&req, &resp);

        assert!(line.starts_with("192.168.1.100 - - ["));
        assert!(line.contains(r#""POST /submit HTTP/1.1""#));
        assert!(line.contains("200"));
        // Common format does NOT include referer / user-agent.
        assert!(!line.contains("TestAgent"));
    }

    #[test]
    fn custom_format_substitutes_variables() {
        let req = make_request(Method::GET, "/page");
        let resp = Response::ok();
        let dur = Duration::from_millis(100);
        let fmt = "$remote_addr $status $request_time";

        let line = AccessLog::format_custom(fmt, &req, &resp, dur);
        assert_eq!(line, "192.168.1.100 200 0.100");
    }

    #[test]
    fn custom_format_unknown_variable_passthrough() {
        let req = make_request(Method::GET, "/x");
        let resp = Response::ok();
        let dur = Duration::from_millis(0);
        let fmt = "$unknown_var";

        let line = AccessLog::format_custom(fmt, &req, &resp, dur);
        assert_eq!(line, "$unknown_var");
    }

    #[test]
    fn missing_headers_default_to_dash() {
        let mut req = Request::new(Method::GET, "/".to_owned(), Version::Http11);
        req.remote_addr = None;
        let resp = Response::ok();
        let dur = Duration::from_millis(0);

        let line = AccessLog::format_combined(&req, &resp, dur);
        assert!(line.starts_with("- - - ["));
        assert!(line.contains(r#""-""#));
    }

    #[test]
    fn escape_log_value_quotes_and_backslashes() {
        assert_eq!(
            AccessLog::escape_log_value(r#"say "hello""#),
            r#"say \"hello\""#
        );
        assert_eq!(
            AccessLog::escape_log_value(r#"path\to\file"#),
            r#"path\\to\\file"#
        );
        assert_eq!(AccessLog::escape_log_value("line1\nline2"), "line1\\nline2");
    }

    #[test]
    fn log_buffer_flush_threshold() {
        let mut buf = LogBuffer::with_capacity(64);
        assert!(!buf.should_flush());
        assert!(buf.is_empty());

        buf.extend_from_slice(&[0u8; FLUSH_THRESHOLD]);
        assert!(buf.should_flush());
        assert!(!buf.is_empty());

        let data = buf.drain();
        assert_eq!(data.len(), FLUSH_THRESHOLD);
        assert!(buf.is_empty());
    }
}