//! Access log following the nginx combined log format.
//!
//! Provides [`AccessLog`] which buffers log entries in memory and flushes
//! them to disk asynchronously via [`tokio::io::BufWriter`]. Log
//! rotation is supported by calling [`AccessLog::reopen`], which flushes
//! the current buffer and closes the file handle so the next write
//! re-opens the (now-rotated) path.
//!
//! # Formats
//!
//! | Variant | Description |
//! |-----------|----------------------------------------------------------|
//! | `Combined`| nginx combined log format (with request duration) |
//! | `Common` | Common Log Format (CLF) |
//! | `Custom` | User-supplied format string with `$variable` placeholders|
//!
//! # Custom format variables
//!
//! `$remote_addr`, `$remote_user`, `$time_local`, `$request`,
//! `$status`, `$body_bytes_sent`, `$http_referer`,
//! `$http_user_agent`, `$request_time`
use std::io;
use std::path::PathBuf;
use std::time::Duration;
use chrono::Local;
use tokio::fs::OpenOptions;
use tokio::io::{AsyncWriteExt, BufWriter};
use crate::http::request::Request;
use crate::http::response::Response;
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
/// Default capacity for the in-memory log buffer (bytes).
const DEFAULT_BUFFER_CAPACITY: usize = 8192;
/// Flush the buffer to the underlying writer once it exceeds this size.
const FLUSH_THRESHOLD: usize = 8192;
// ---------------------------------------------------------------------------
// LogBuffer
// ---------------------------------------------------------------------------
/// An in-memory byte buffer that accumulates formatted log lines and
/// flushes them in bulk for fewer I/O syscalls.
struct LogBuffer {
buf: Vec<u8>,
threshold: usize,
}
impl LogBuffer {
fn with_capacity(capacity: usize) -> Self {
Self {
buf: Vec::with_capacity(capacity),
threshold: FLUSH_THRESHOLD,
}
}
/// Append `data` to the buffer.
#[inline]
fn extend_from_slice(&mut self, data: &[u8]) {
self.buf.extend_from_slice(data);
}
/// Append a single byte.
#[inline]
fn push(&mut self, byte: u8) {
self.buf.push(byte);
}
/// Return `true` if the buffer has reached the flush threshold.
#[inline]
fn should_flush(&self) -> bool {
self.buf.len() >= self.threshold
}
/// Return `true` if the buffer is empty.
#[inline]
fn is_empty(&self) -> bool {
self.buf.is_empty()
}
/// Drain the buffer contents, leaving it empty.
fn drain(&mut self) -> Vec<u8> {
std::mem::take(&mut self.buf)
}
/// Clear the buffer without returning its contents.
fn clear(&mut self) {
self.buf.clear();
}
}
// ---------------------------------------------------------------------------
// LogFormat
// ---------------------------------------------------------------------------
/// The log format used when writing access entries.
#[derive(Debug, Clone)]
pub enum LogFormat {
/// nginx combined log format (with request duration appended).
///
/// ```text
/// $remote_addr - $remote_user [$time_local] "$request"
/// $status $body_bytes_sent "$http_referer" "$http_user_agent"
/// ```
Combined,
/// Common Log Format (CLF).
///
/// ```text
/// $remote_addr - $remote_user [$time_local] "$request"
/// $status $body_bytes_sent
/// ```
Common,
/// A custom format string. Placeholders use the `$variable` syntax
/// (e.g. `$remote_addr`). Unrecognised placeholders are left as-is.
Custom(String),
}
// ---------------------------------------------------------------------------
// AccessLog
// ---------------------------------------------------------------------------
/// Production access logger with buffered async I/O.
///
/// # Usage
///
/// ```ignore
/// let mut log = AccessLog::new("/var/log/nginx/access.log".into(),
/// LogFormat::Combined);
/// log.open().await?;
///
/// // Inside the request handler:
/// log.log(&request, &response, elapsed).await;
///
/// // Periodically (e.g. via SIGUSR1):
/// log.reopen().await?;
/// ```
pub struct AccessLog {
/// Path to the log file on disk.
path: PathBuf,
/// Active log format.
format: LogFormat,
/// In-memory buffer that batches log lines.
buffer: LogBuffer,
/// Buffered async file writer. `None` until [`open`](Self::open) is
/// called, and reset to `None` after [`reopen`](Self::reopen).
writer: Option<BufWriter<tokio::fs::File>>,
}
impl AccessLog {
/// Create a new access logger. The log file is **not** opened until
/// [`open`](Self::open) is called.
pub fn new(path: PathBuf, format: LogFormat) -> Self {
Self {
path,
format,
buffer: LogBuffer::with_capacity(DEFAULT_BUFFER_CAPACITY),
writer: None,
}
}
/// Open (or re-open) the log file for writing.
///
/// The file is opened in append mode so multiple workers can share
/// the same path. If the file does not exist it is created with
/// mode `0o644`.
pub async fn open(&mut self) -> io::Result<()> {
let file = OpenOptions::new()
.create(true)
.append(true)
.open(&self.path)
.await?;
self.writer = Some(BufWriter::new(file));
Ok(())
}
/// Log a completed request/response cycle.
///
/// The entry is formatted according to the configured [`LogFormat`]
/// and appended to the in-memory buffer. When the buffer exceeds
/// [`FLUSH_THRESHOLD`] bytes it is automatically flushed to disk.
pub async fn log(&mut self, request: &Request, response: &Response, duration: Duration) {
let line = match &self.format {
LogFormat::Combined => Self::format_combined(request, response, duration),
LogFormat::Common => Self::format_common(request, response),
LogFormat::Custom(fmt) => Self::format_custom(fmt, request, response, duration),
};
self.buffer.extend_from_slice(line.as_bytes());
self.buffer.push(b'\n');
if self.buffer.should_flush() {
// Best-effort flush -- a transient I/O error should not
// crash the request handler.
let _ = self.flush().await;
}
}
/// Flush all buffered log entries to disk.
///
/// If no file has been opened yet (or it was closed by [`reopen`]),
/// the buffer contents are silently discarded.
pub async fn flush(&mut self) -> io::Result<()> {
if self.buffer.is_empty() {
return Ok(());
}
let writer = match &mut self.writer {
Some(w) => w,
None => {
// No file open -- drop the buffered data. This can
// happen if log() is called before open().
self.buffer.clear();
return Ok(());
}
};
// Take the buffer contents out so we can write without holding
// a borrow on self.buffer.
let data = self.buffer.drain();
writer.write_all(&data).await?;
writer.flush().await?;
Ok(())
}
/// Reopen the log file, supporting external log rotation.
///
/// Typical usage: an external tool moves the current log file aside
/// and sends `SIGUSR1` to the server, which calls this method.
/// The buffered data is flushed to the **old** file first, then the
/// handle is dropped so the next write creates / opens the new file.
pub async fn reopen(&mut self) -> io::Result<()> {
// Flush remaining data to the old file.
self.flush().await?;
// Drop the writer. The next call to flush() will need
// open() to be called again.
self.writer = None;
Ok(())
}
// -- Format helpers -----------------------------------------------------
/// Format a log line in nginx *combined* format.
///
/// ```text
/// 192.168.1.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /index.html HTTP/1.1" 200 2326 "http://www.example.com" "Mozilla/5.0"
/// ```
fn format_combined(request: &Request, response: &Response, duration: Duration) -> String {
let remote_addr = Self::remote_addr(request);
let time_local = Self::time_local();
let request_line = Self::request_line(request);
let status = response.status.as_u16();
let body_bytes = response.body.len();
let referer = Self::header_value(request, "referer");
let user_agent = Self::header_value(request, "user-agent");
let duration_ms = duration.as_millis();
format!(
r#"{} - - [{}] "{}" {} {} "{}" "{}" {}ms"#,
remote_addr,
time_local,
request_line,
status,
body_bytes,
referer,
user_agent,
duration_ms,
)
}
/// Format a log line in Common Log Format (CLF).
fn format_common(request: &Request, response: &Response) -> String {
let remote_addr = Self::remote_addr(request);
let time_local = Self::time_local();
let request_line = Self::request_line(request);
let status = response.status.as_u16();
let body_bytes = response.body.len();
format!(
r#"{} - - [{}] "{}" {} {}"#,
remote_addr, time_local, request_line, status, body_bytes,
)
}
/// Format a log line from a custom format string.
///
/// Replaces `$variable` placeholders with their corresponding values.
fn format_custom(
fmt: &str,
request: &Request,
response: &Response,
duration: Duration,
) -> String {
// Pre-compute all values once so we can replace without
// re-evaluating each variable multiple times.
let remote_addr = Self::remote_addr(request);
let time_local = Self::time_local();
let request_line = Self::request_line(request);
let status = response.status.as_u16().to_string();
let body_bytes = response.body.len().to_string();
let referer = Self::header_value(request, "referer");
let user_agent = Self::header_value(request, "user-agent");
let request_time = format!("{:.3}", duration.as_secs_f64());
let mut result = String::with_capacity(fmt.len() + 256);
// Walk the format string and substitute variables.
let mut chars = fmt.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '$' {
// Read the variable name (letters, digits, underscore).
let mut var = String::new();
while let Some(&c) = chars.peek() {
if c.is_ascii_alphanumeric() || c == '_' {
var.push(c);
chars.next();
} else {
break;
}
}
match var.as_str() {
"remote_addr" => result.push_str(&remote_addr),
"remote_user" => result.push('-'),
"time_local" => result.push_str(&time_local),
"request" => result.push_str(&request_line),
"status" => result.push_str(&status),
"body_bytes_sent" => result.push_str(&body_bytes),
"http_referer" => result.push_str(&referer),
"http_user_agent" => result.push_str(&user_agent),
"request_time" => result.push_str(&request_time),
_ => {
// Unknown variable -- emit the literal text.
result.push('$');
result.push_str(&var);
}
}
} else {
result.push(ch);
}
}
result
}
// -- Shared formatting helpers ------------------------------------------
/// Extract the client IP address from the request, defaulting to `"-"`.
fn remote_addr(request: &Request) -> String {
request
.remote_addr
.map(|a| a.ip().to_string())
.unwrap_or_else(|| "-".to_string())
}
/// Format the current local time in the nginx `$time_local` style:
/// `10/Oct/2000:13:55:36 -0700`.
fn time_local() -> String {
Local::now().format("%d/%b/%Y:%H:%M:%S %z").to_string()
}
/// Reconstruct the HTTP request line: `METHOD URI VERSION`.
fn request_line(request: &Request) -> String {
format!("{} {} {}", request.method, request.uri, request.version)
}
/// Return the first value of a request header, or `"-"` if absent.
///
/// Quotes and backslashes in the value are escaped to prevent log
/// injection.
fn header_value(request: &Request, name: &str) -> String {
match request.headers.get(name) {
Some(val) => match val.to_str() {
Ok(s) => Self::escape_log_value(s),
Err(_) => "-".to_string(),
},
None => "-".to_string(),
}
}
/// Escape characters that could break log parsers or allow injection.
///
/// Replaces `"` with `\"` and `\` with `\\`.
fn escape_log_value(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for ch in s.chars() {
match ch {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
_ => out.push(ch),
}
}
out
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::http::request::{Method, Request, Version};
use crate::http::response::Response;
use std::net::SocketAddr;
use std::str::FromStr;
fn make_request(method: Method, uri: &str) -> Request {
let mut req = Request::new(method, uri.to_owned(), Version::Http11);
req.remote_addr = Some(SocketAddr::from_str("192.168.1.100:54321").unwrap());
req.headers.insert("referer", "http://example.com");
req.headers.insert("user-agent", "TestAgent/1.0");
req
}
#[test]
fn combined_format_basic() {
let req = make_request(Method::GET, "/index.html");
let resp = Response::ok();
let dur = Duration::from_millis(42);
let line = AccessLog::format_combined(&req, &resp, dur);
assert!(line.starts_with("192.168.1.100 - - ["));
assert!(line.contains(r#""GET /index.html HTTP/1.1""#));
assert!(line.contains("200"));
assert!(line.contains(r#""http://example.com""#));
assert!(line.contains(r#""TestAgent/1.0""#));
assert!(line.contains("42ms"));
}
#[test]
fn common_format_basic() {
let req = make_request(Method::POST, "/submit");
let resp = Response::ok();
let line = AccessLog::format_common(&req, &resp);
assert!(line.starts_with("192.168.1.100 - - ["));
assert!(line.contains(r#""POST /submit HTTP/1.1""#));
assert!(line.contains("200"));
// Common format does NOT include referer / user-agent.
assert!(!line.contains("TestAgent"));
}
#[test]
fn custom_format_substitutes_variables() {
let req = make_request(Method::GET, "/page");
let resp = Response::ok();
let dur = Duration::from_millis(100);
let fmt = "$remote_addr $status $request_time";
let line = AccessLog::format_custom(fmt, &req, &resp, dur);
assert_eq!(line, "192.168.1.100 200 0.100");
}
#[test]
fn custom_format_unknown_variable_passthrough() {
let req = make_request(Method::GET, "/x");
let resp = Response::ok();
let dur = Duration::from_millis(0);
let fmt = "$unknown_var";
let line = AccessLog::format_custom(fmt, &req, &resp, dur);
assert_eq!(line, "$unknown_var");
}
#[test]
fn missing_headers_default_to_dash() {
let mut req = Request::new(Method::GET, "/".to_owned(), Version::Http11);
req.remote_addr = None;
let resp = Response::ok();
let dur = Duration::from_millis(0);
let line = AccessLog::format_combined(&req, &resp, dur);
assert!(line.starts_with("- - - ["));
assert!(line.contains(r#""-""#));
}
#[test]
fn escape_log_value_quotes_and_backslashes() {
assert_eq!(
AccessLog::escape_log_value(r#"say "hello""#),
r#"say \"hello\""#
);
assert_eq!(
AccessLog::escape_log_value(r#"path\to\file"#),
r#"path\\to\\file"#
);
assert_eq!(AccessLog::escape_log_value("line1\nline2"), "line1\\nline2");
}
#[test]
fn log_buffer_flush_threshold() {
let mut buf = LogBuffer::with_capacity(64);
assert!(!buf.should_flush());
assert!(buf.is_empty());
buf.extend_from_slice(&[0u8; FLUSH_THRESHOLD]);
assert!(buf.should_flush());
assert!(!buf.is_empty());
let data = buf.drain();
assert_eq!(data.len(), FLUSH_THRESHOLD);
assert!(buf.is_empty());
}
}