Parser Example
Parser Example - WAF Unified
Parser Example - WAF Unified
Overview
Parser đồng thời xử lý 3 loại log WAF:
- Access Log: Log truy cập thành công (có field
remote_addr) - CCLOUD_WAAP: Log phát hiện threat (chứa text
CCLOUD_WAAP) - Rate Limiting: Log giới hạn tốc độ (chứa text
limiting requests)
Sample Logs
1. Access Log
{"@timestamp":"2026-03-16T07:16:57.278Z","body_bytes_sent":"183891","host_header":"careerviet.vn","http_referer":"https://careerviet.vn/vi/tim-viec-lam/ky-su-qs-du-an-xay-dung.35C65B52.html","http_user_agent":"Mozilla/5.0 (iPad; CPU OS 26_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/145.0.7632.108 Mobile/15E148 Safari/604.1","remote_addr":"171.224.178.18","request":"GET /vi/employers?_rsc=1vs0z HTTP/1.1","request_id":"cfd30872ab24f8fbaf4278d5d47690dc","request_time":"1.389","server_name":"careerviet.vn","status":"200","timestamp":"2026-03-16T07:17:00.124Z","upstream_addr":"222.255.236.231:443"}
2. CCLOUD_WAAP Threat Log
{"@timestamp":"2026-03-16T07:20:58.978Z","message":"2026/03/16 07:20:58 [error] 912648#912648: *855400855 CCLOUD_WAAP: ip=14.160.11.74&server=dichvucong.hanoi.gov.vn&uri=%2Fapi-tiepnhan%2Fv1%2Ftiep-nhan%2Fpost&config=learning&rid=9fcfde7c83dc7312ada4c01cf20b50c4&cscore0=$TRAVERSAL&score0=4&cscore1=$XSS&score1=104&cscore2=$SQL&score2=58, client: 14.160.11.74, server: dichvucong.hanoi.gov.vn, request: \"POST /api-tiepnhan/v1/tiep-nhan/post HTTP/1.1\", host: \"dichvucong.hanoi.gov.vn\", referrer: \"https://dichvucong.hanoi.gov.vn/tiepnhan/ho-so/tiep-nhan-ho-so\"","timestamp":"2026-03-16T07:21:00.927Z"}
3. Rate Limiting Log
{"@timestamp":"2026-03-13T02:31:31.182Z","message":"2026/03/13 02:31:31 [error] 899248#899248: *772879136 limiting requests, excess: 5.410 by zone \"LGiXtUdxIn\", client: 222.254.5.166, server: chungchidauthau.mpi.gov.vn, request: \"GET /cong-thi/_next/static/chunks/437ecaaf2fbf11e8.js HTTP/1.1\", host: \"chungchidauthau.mpi.gov.vn\", referrer: \"https://chungchidauthau.mpi.gov.vn/cong-thi\"","timestamp":"2026-03-13T02:31:32.851Z"}
Parser Configuration
#regex
#conditional
event_timestamp = .timestamp
src_ip = ""
http_method = ""
url_full = ""
http_protocol = ""
http_status = ""
response_bytes = ""
http_referer = ""
user_agent = ""
server_hostname = ""
host_header = ""
request_duration = ""
event_id = ""
upstream_server = ""
log_level = ""
event_action = ""
threat_score = ""
threat_type = ""
threat_rule = ""
threat_status = ""
remote_ip_str = ""
if (remote_ip, err = to_string(.remote_addr); err == null) { remote_ip_str = remote_ip }
if remote_ip_str != "" {
src_ip = remote_ip_str
log_level = "info"
event_action = "access"
if (req_val, err = to_string(.request); err == null) {
request_parts = split(req_val, " ")
http_method = request_parts[0]
url_full = request_parts[1]
http_protocol = request_parts[2]
}
if (status_val, err = to_string(.status); err == null) { http_status = status_val }
if (bytes_val, err = to_string(.body_bytes_sent); err == null) { response_bytes = bytes_val }
if (ref_val, err = to_string(.http_referer); err == null) { http_referer = ref_val }
if (ua_val, err = to_string(.http_user_agent); err == null) { user_agent = ua_val }
if (hh_val, err = to_string(.host_header); err == null) { host_header = hh_val }
if (sn_val, err = to_string(.server_name); err == null) { server_hostname = sn_val }
if (rt_val, err = to_string(.request_time); err == null) { request_duration = rt_val }
if (rid_val, err = to_string(.request_id); err == null) { event_id = rid_val }
if (ua_val, err = to_string(.upstream_addr); err == null) { upstream_server = ua_val }
} else {
log_level = "error"
if (msg_val, err = to_string(.message); err == null) {
msg_str = msg_val
if (client_m, err = parse_regex(msg_str, r'client: (?P<ip>\d+\.\d+\.\d+\.\d+)'); err == null) {
src_ip = client_m.ip
}
if (server_m, err = parse_regex(msg_str, r'server: (?P<server>[^,]+)'); err == null) {
server_hostname = server_m.server
}
if (req_m, err = parse_regex(msg_str, r'request: "(?P<method>\w+) (?P<uri>[^ ]+) (?P<protocol>[^"]+)"'); err == null) {
http_method = req_m.method
url_full = req_m.uri
http_protocol = req_m.protocol
}
if (host_m, err = parse_regex(msg_str, r'host: "(?P<host>[^"]+)"'); err == null) {
host_header = host_m.host
}
if (ref_m, err = parse_regex(msg_str, r'referrer: "(?P<ref>[^"]+)"'); err == null) {
http_referer = ref_m.ref
}
if contains(msg_str, "CCLOUD_WAAP") {
event_action = "threat_detected"
threat_status = "detected"
if (rid_m, err = parse_regex(msg_str, r'rid=(?P<rid>[a-f0-9]+)'); err == null) {
event_id = rid_m.rid
}
threat_type = ""
if contains(msg_str, "$TRAVERSAL") { threat_type = "PATH_TRAVERSAL" }
if contains(msg_str, "$XSS") {
if threat_type == "" { threat_type = "XSS" } else { threat_type = threat_type + ",XSS" }
}
if contains(msg_str, "$SQL") {
if threat_type == "" { threat_type = "SQL_INJECTION" } else { threat_type = threat_type + ",SQL_INJECTION" }
}
}
if contains(msg_str, "limiting requests") {
event_action = "rate_limited"
threat_status = "blocked"
if (zone_m, err = parse_regex(msg_str, r'by zone "(?P<zone>[^"]+)"'); err == null) {
threat_rule = zone_m.zone
}
if (excess_m, err = parse_regex(msg_str, r'excess: (?P<excess>[\d.]+)'); err == null) {
threat_score = excess_m.excess
}
}
}
}
#normalize
timestamp: format_timestamp!(parse_timestamp!(event_timestamp, "%Y-%m-%dT%H:%M:%S.%3fZ"), "%Y-%m-%d %H:%M:%S")
source.ip: src_ip
http.request.method: http_method
url.full: url_full
http.version: http_protocol
http.response.status_code: http_status
destination.bytes: response_bytes
http.request.referrer: http_referer
user_agent.original: user_agent
url.domain: host_header
host.name: server_hostname
event.duration: request_duration
event.id: event_id
server.ip: upstream_server
log.level: log_level
event.action: event_action
threat.score: threat_score
threat.type: threat_type
threat.rule: threat_rule
threat.status: threat_status
Output (ECS Format)
1. Access Log Output
{
"timestamp": "2026-03-16T07:17:00.124Z",
"source.ip": "171.224.178.18",
"http.request.method": "GET",
"url.full": "/vi/employers?_rsc=1vs0z",
"http.version": "HTTP/1.1",
"http.response.status_code": "200",
"destination.bytes": "183891",
"http.request.referrer": "https://careerviet.vn/vi/tim-viec-lam/ky-su-qs-du-an-xay-dung.35C65B52.html",
"user_agent.original": "Mozilla/5.0 (iPad; CPU OS 26_3_1 like Mac OS X)...",
"url.domain": "careerviet.vn",
"host.name": "careerviet.vn",
"event.duration": "1.389",
"event.id": "cfd30872ab24f8fbaf4278d5d47690dc",
"server.ip": "222.255.236.231:443",
"log.level": "info",
"event.action": "access"
}
2. CCLOUD_WAAP Threat Output
{
"timestamp": "2026-03-16T07:21:00.927Z",
"source.ip": "14.160.11.74",
"http.request.method": "POST",
"url.full": "/api-tiepnhan/v1/tiep-nhan/post",
"http.version": "HTTP/1.1",
"http.request.referrer": "https://dichvucong.hanoi.gov.vn/tiepnhan/ho-so/tiep-nhan-ho-so",
"url.domain": "dichvucong.hanoi.gov.vn",
"host.name": "dichvucong.hanoi.gov.vn",
"event.id": "9fcfde7c83dc7312ada4c01cf20b50c4",
"log.level": "error",
"event.action": "threat_detected",
"threat.status": "detected",
"threat.type": "PATH_TRAVERSAL,XSS,SQL_INJECTION"
}
3. Rate Limiting Output
{
"timestamp": "2026-03-13T02:31:32.851Z",
"source.ip": "222.254.5.166",
"http.request.method": "GET",
"url.full": "/cong-thi/_next/static/chunks/437ecaaf2fbf11e8.js",
"http.version": "HTTP/1.1",
"http.request.referrer": "https://chungchidauthau.mpi.gov.vn/cong-thi",
"url.domain": "chungchidauthau.mpi.gov.vn",
"host.name": "chungchidauthau.mpi.gov.vn",
"log.level": "error",
"event.action": "rate_limited",
"threat.status": "blocked",
"threat.rule": "LGiXtUdxIn",
"threat.score": "5.410"
}
Explanation
#regex Block
- Để trống vì log đã ở dạng JSON
- Không cần regex để tách field, truy cập trực tiếp các field JSON
#conditional Block
- Khởi tạo biến: Khai báo tất cả biến rỗng trước khi xử lý
- Phân loại log: Dùng
to_string(.remote_addr)để kiểm tra loại log- Nếu
remote_addrcó giá trị → Access Log - Nếu
remote_addrrỗng → Error Log (CCLOUD_WAAP hoặc Rate Limiting)
- Nếu
- Access Log: Truy cập trực tiếp các field JSON với
to_string() - Error Log: Parse từ field
messagevớiparse_regex() - Threat detection: Dùng
contains()để phân loại CCLOUD_WAAP vs Rate Limiting
#normalize Block
- Mapping các biến VRL sang ECS field names
- Format:
ecs_field: vrl_variable
VRL Functions Used (per VRL Functions.md)
| Function | Category | Usage |
|---|---|---|
to_string() | Coerce functions | Coerce value to string, check field existence |
split() | String Manipulation | Split request string |
contains() | String Manipulation | Check log type |
parse_regex() | Parse functions | Extract data from message |
Field Mapping (Field Standard compliant)
| ECS Field | Access Log | CCLOUD_WAAP | Rate Limiting |
|---|---|---|---|
| source.ip | remote_addr | client: | client: |
| http.request.method | request0 | request | request |
| url.full | request1 | request | request |
| http.version | request2 | request | request |
| http.response.status_code | status | - | - |
| destination.bytes | body_bytes_sent | - | - |
| http.request.referrer | http_referer | referrer: | referrer: |
| user_agent.original | http_user_agent | - | - |
| url.domain | host_header | host: | host: |
| host.name | server_name | server: | server: |
| event.duration | request_time | - | - |
| event.id | request_id | rid= | - |
| server.ip | upstream_addr | - | - |
| log.level | "info" | "error" | "error" |
| event.action | "access" | "threat_detected" | "rate_limited" |
| threat.status | - | "detected" | "blocked" |
| threat.type | - | $XSS/$SQL/$TRAVERSAL | - |
| threat.rule | - | - | zone |
| threat.score | - | - | excess |
Test Commands
# Copy files to WSL
cp waf-parser.vrl ~/parser-workspace/
cp test-access.json ~/parser-workspace/
cp test-threat.json ~/parser-workspace/
cp test-ratelimit.json ~/parser-workspace/
# Test with Docker Vector
cat ~/parser-workspace/test-access.json | docker run --rm -i -v ~/parser-workspace:/workspace timberio/vector:latest-alpine vrl --program /workspace/waf-parser.vrl
cat ~/parser-workspace/test-threat.json | docker run --rm -i -v ~/parser-workspace:/workspace timberio/vector:latest-alpine vrl --program /workspace/waf-parser.vrl
cat ~/parser-workspace/test-ratelimit.json | docker run --rm -i -v ~/parser-workspace:/workspace timberio/vector:latest-alpine vrl --program /workspace/waf-parser.vrl