import re
from collections import defaultdict
from datetime import datetime
import os
class NginxStatsParser:
def __init__(self, log_path='/var/log/nginx/access.log'):
self.log_path = log_path
self.ip_pattern = r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'
self.date_pattern = r'\[(\d{2}\/\w+\/\d{4})'
self.url_pattern = r'\"GET ([^\"]+)'
def parse_logs(self):
stats = {
'total_visits': 0,
'unique_visitors': set(),
'daily_visits': defaultdict(int),
'popular_pages': defaultdict(int),
'bot_visits': 0
with open(self.log_path, 'r') as f:
for line in f:
# Skip bot requests
if 'bot' in line.lower() or 'crawler' in line.lower():
stats['bot_visits'] += 1
# Extract IP
ip_match = re.search(self.ip_pattern, line)
if ip_match:
# Extract date
date_match = re.search(self.date_pattern, line)
if date_match:
date = datetime.strptime(date_match.group(1), '%d/%b/%Y').strftime('%Y-%m-%d')
stats['daily_visits'][date] += 1
# Extract URL
url_match = re.search(self.url_pattern, line)
if url_match:
url = url_match.group(1)
# Skip static files and admin pages
if not any(ext in url for ext in ['.css', '.js', '.jpg', '.png', 'wp-admin']):
stats['popular_pages'][url] += 1
stats['total_visits'] += 1
except FileNotFoundError:
return f"Error: Log file not found at {self.log_path}"
return stats
def generate_report(self):
stats = self.parse_logs()
if isinstance(stats, str): # Error message
return stats
report = {
'summary': {
'total_visits': stats['total_visits'],
'unique_visitors': len(stats['unique_visitors']),
'bot_visits': stats['bot_visits']
'daily_visits': dict(sorted(stats['daily_visits'].items(), reverse=True)[:7]), # Last 7 days
'popular_pages': dict(sorted(stats['popular_pages'].items(),
key=lambda x: x[1], reverse=True)[:5]) # Top 5 pages
return report
def save_to_html(report, output_path='stats.html'):
"""Generate a simple HTML report"""
html = f"""
<!DOCTYPE html>
<title>Website Statistics</title>
body {{ font-family: Arial, sans-serif; margin: 20px; }}
.stats-container {{ max-width: 800px; margin: 0 auto; }}
.stat-box {{ background: #f5f5f5; padding: 15px; margin: 10px 0; border-radius: 5px; }}
table {{ width: 100%; border-collapse: collapse; }}
th, td {{ padding: 8px; text-align: left; border-bottom: 1px solid #ddd; }}
<div class="stats-container">
<h1>Website Statistics</h1>
<div class="stat-box">
<p>Total Visits: {report['summary']['total_visits']}</p>
<p>Unique Visitors: {report['summary']['unique_visitors']}</p>
<p>Bot Visits: {report['summary']['bot_visits']}</p>
<div class="stat-box">
<h2>Daily Visits (Last 7 days)</h2>
for date, visits in report['daily_visits'].items())}
<div class="stat-box">
<h2>Popular Pages</h2>
for page, visits in report['popular_pages'].items())}
with open(output_path, 'w') as f:
if __name__ == '__main__':
# Usage example
parser = NginxStatsParser()
report = parser.generate_report()
python3 nginx_stats.py
- 一种是建立一个网站访问统计页面模板
# stats_page_template.php
Template Name: Statistics Page
get_header(); ?>
<div id="primary" class="content-area">
<main id="main" class="site-main">
<article class="page type-page">
<div class="entry-content">
$stats_file = '/path/to/your/stats.html';
if (file_exists($stats_file)) {
// Read the HTML content but exclude DOCTYPE, html, head, and body tags
$html = file_get_contents($stats_file);
preg_match('/<div class="stats-container">(.*?)<\/div>/s', $html, $matches);
if (isset($matches[0])) {
echo $matches[0];
} else {
echo 'Statistics are currently being updated.';
} else {
echo 'Statistics file not found.';
<?php get_footer(); ?>
- 一种是使用短代码(shortcode)的方式
function get_visitor_count() {
$stats_file = '/path/to/your/visitor_count.html';
if (file_exists($stats_file)) {
return file_get_contents($stats_file);
return '';
然后可以在header.php, footer.php, or sidebar.php等模板中引用:
header.php, footer.php, or sidebar.php
// Add this to functions.php
add_shortcode('visitor_count', 'get_visitor_count');
- 加入安全措施
- 限制登录后访问,在页面模板前加入:
# Add this at the top of page-statistics.php
if (!current_user_can('manage_options')) {
wp_die('Access denied');
- 为python脚本加入日志信息:
# Add to nginx_stats.py
import logging
format='%(asctime)s - %(message)s'
- 一个加入认证机制的页面模板
Template Name: Statistics Page
// Basic WordPress authentication
if (!is_user_logged_in()) {
// Advanced role-based authentication
if (!current_user_can('manage_options') && !current_user_can('edit_posts')) {
wp_die('You do not have sufficient permissions to access this page.');
// Optional: Add specific user check
$allowed_users = array('admin', 'editor', 'analyst');
$current_user = wp_get_current_user();
if (!in_array($current_user->user_login, $allowed_users)) {
wp_die('Your user account does not have access to statistics.');
// Optional: Add IP-based restrictions
$allowed_ips = array(
$current_ip = $_SERVER['REMOTE_ADDR'];
if (!in_array($current_ip, $allowed_ips)) {
// Log failed attempt
error_log("Stats page access attempted from unauthorized IP: {$current_ip}");
wp_die('Access not allowed from your location.');
get_header(); ?>
<div id="primary" class="content-area">
<main id="main" class="site-main">
<article class="page type-page">
<div class="entry-content">
// Add nonce for additional security
if (!wp_verify_nonce($_GET['stats_nonce'], 'view_stats')) {
$stats_nonce = wp_create_nonce('view_stats');
$stats_file = '/path/to/your/stats.html';
if (file_exists($stats_file)) {
// Read the HTML content
$html = file_get_contents($stats_file);
preg_match('/<div class="stats-container">(.*?)<\/div>/s', $html, $matches);
if (isset($matches[0])) {
// Add last updated timestamp
echo '<p class="stats-updated">Last updated: ' . date('Y-m-d H:i:s', filemtime($stats_file)) . '</p>';
echo $matches[0];
} else {
echo 'Statistics are currently being updated.';
} else {
echo 'Statistics file not found.';
<?php get_footer(); ?>
You can choose which authentication methods you want to use:
- Basic login requirement (keep the is_user_logged_in() check)
- Role-based access (keep the current_user_can() check)
- Specific users (customize the $allowed_users array)
- IP restrictions (customize the $allowed_ips array)
- 一个简单的加入限制刷新频率的页面模板
Template Name: Simple Visitor Count
// Start session for rate limiting
if (!isset($_SESSION)) {
// Simple rate limiting
$rate_limit = 300; // 5 minutes
if (isset($_SESSION['last_stats_view']) &&
time() - $_SESSION['last_stats_view'] < $rate_limit) {
wp_die('Please wait a few minutes before refreshing the statistics.');
$_SESSION['last_stats_view'] = time();
get_header(); ?>
<div id="primary" class="content-area">
<main id="main" class="site-main">
<article class="page type-page">
<div class="entry-content">
.visitor-count-container {
text-align: center;
padding: 2em;
margin: 2em 0;
background: #f8f9fa;
border-radius: 8px;
.visitor-count {
font-size: 2em;
color: #2c3e50;
margin-bottom: 0.5em;
.last-updated {
color: #7f8c8d;
font-size: 0.9em;
$stats_file = '/path/to/your/visitor_count.html';
if (file_exists($stats_file)) {
} else {
echo '<p>Visitor statistics are currently unavailable.</p>';
<?php get_footer(); ?>
- 使用cron自动化运行脚本
# Open crontab editor
crontab -e
# Add this line to run the script every hour
0 * * * * /usr/bin/python3 /path/to/nginx_stats.py
# Or run it every 6 hours
0 */6 * * * /usr/bin/python3 /path/to/nginx_stats.py