网站访问统计

使用Claude制作了一个统计网站访问量的脚本:

import re
from collections import defaultdict
from datetime import datetime
import os

class NginxStatsParser:
    def __init__(self, log_path='/var/log/nginx/access.log'):
        self.log_path = log_path
        self.ip_pattern = r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'
        self.date_pattern = r'\[(\d{2}\/\w+\/\d{4})'
        self.url_pattern = r'\"GET ([^\"]+)'
        
    def parse_logs(self):
        stats = {
            'total_visits': 0,
            'unique_visitors': set(),
            'daily_visits': defaultdict(int),
            'popular_pages': defaultdict(int),
            'bot_visits': 0
        }
        
        try:
            with open(self.log_path, 'r') as f:
                for line in f:
                    # Skip bot requests
                    if 'bot' in line.lower() or 'crawler' in line.lower():
                        stats['bot_visits'] += 1
                        continue
                    
                    # Extract IP
                    ip_match = re.search(self.ip_pattern, line)
                    if ip_match:
                        stats['unique_visitors'].add(ip_match.group(1))
                    
                    # Extract date
                    date_match = re.search(self.date_pattern, line)
                    if date_match:
                        date = datetime.strptime(date_match.group(1), '%d/%b/%Y').strftime('%Y-%m-%d')
                        stats['daily_visits'][date] += 1
                    
                    # Extract URL
                    url_match = re.search(self.url_pattern, line)
                    if url_match:
                        url = url_match.group(1)
                        # Skip static files and admin pages
                        if not any(ext in url for ext in ['.css', '.js', '.jpg', '.png', 'wp-admin']):
                            stats['popular_pages'][url] += 1
                            stats['total_visits'] += 1
                            
        except FileNotFoundError:
            return f"Error: Log file not found at {self.log_path}"
        
        return stats

    def generate_report(self):
        stats = self.parse_logs()
        if isinstance(stats, str):  # Error message
            return stats
            
        report = {
            'summary': {
                'total_visits': stats['total_visits'],
                'unique_visitors': len(stats['unique_visitors']),
                'bot_visits': stats['bot_visits']
            },
            'daily_visits': dict(sorted(stats['daily_visits'].items(), reverse=True)[:7]),  # Last 7 days
            'popular_pages': dict(sorted(stats['popular_pages'].items(), 
                                key=lambda x: x[1], reverse=True)[:5])  # Top 5 pages
        }
        return report

def save_to_html(report, output_path='stats.html'):
    """Generate a simple HTML report"""
    html = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <title>Website Statistics</title>
        <style>
            body {{ font-family: Arial, sans-serif; margin: 20px; }}
            .stats-container {{ max-width: 800px; margin: 0 auto; }}
            .stat-box {{ background: #f5f5f5; padding: 15px; margin: 10px 0; border-radius: 5px; }}
            table {{ width: 100%; border-collapse: collapse; }}
            th, td {{ padding: 8px; text-align: left; border-bottom: 1px solid #ddd; }}
        </style>
    </head>
    <body>
        <div class="stats-container">
            <h1>Website Statistics</h1>
            
            <div class="stat-box">
                <h2>Summary</h2>
                <p>Total Visits: {report['summary']['total_visits']}</p>
                <p>Unique Visitors: {report['summary']['unique_visitors']}</p>
                <p>Bot Visits: {report['summary']['bot_visits']}</p>
            </div>
            
            <div class="stat-box">
                <h2>Daily Visits (Last 7 days)</h2>
                <table>
                    <tr><th>Date</th><th>Visits</th></tr>
                    {''.join(f'<tr><td>{date}</td><td>{visits}</td></tr>' 
                            for date, visits in report['daily_visits'].items())}
                </table>
            </div>
            
            <div class="stat-box">
                <h2>Popular Pages</h2>
                <table>
                    <tr><th>Page</th><th>Visits</th></tr>
                    {''.join(f'<tr><td>{page}</td><td>{visits}</td></tr>' 
                            for page, visits in report['popular_pages'].items())}
                </table>
            </div>
        </div>
    </body>
    </html>
    """
    
    with open(output_path, 'w') as f:
        f.write(html)

if __name__ == '__main__':
    # Usage example
    parser = NginxStatsParser()
    report = parser.generate_report()
    save_to_html(report)

这个脚本将统计网站总的访问量、过去七天每日访问量、访问次数最多的页面等.

将其保存到文件夹后,运行脚本:
python3 nginx_stats.py

统计结果将输出为一个html文件。

如果要在网站使用这个html文件,有两种方法。

  1. 一种是建立一个网站访问统计页面模板
# stats_page_template.php
<?php
/*
Template Name: Statistics Page
*/

get_header(); ?>

<div id="primary" class="content-area">
    <main id="main" class="site-main">
        <article class="page type-page">
            <div class="entry-content">
                <?php
                $stats_file = '/path/to/your/stats.html';
                if (file_exists($stats_file)) {
                    // Read the HTML content but exclude DOCTYPE, html, head, and body tags
                    $html = file_get_contents($stats_file);
                    preg_match('/<div class="stats-container">(.*?)<\/div>/s', $html, $matches);
                    if (isset($matches[0])) {
                        echo $matches[0];
                    } else {
                        echo 'Statistics are currently being updated.';
                    }
                } else {
                    echo 'Statistics file not found.';
                }
                ?>
            </div>
        </article>
    </main>
</div>

<?php get_footer(); ?>

将其中的/path/to/your/stats.html替换为真实的文件地址,然后在后台新建页面,模板选择这个新的“stats_page_template”。

  1. 一种是使用短代码(shortcode)的方式

在functions.php中加入:

function get_visitor_count() {
    $stats_file = '/path/to/your/visitor_count.html';
    if (file_exists($stats_file)) {
        return file_get_contents($stats_file);
    }
    return '';
}

记得将其中的/path/to/your/visitor_count.html替换为真实地址

然后可以在header.php, footer.php, or sidebar.php等模板中引用:
header.php, footer.php, or sidebar.php

或者进一步在functions.php中加入:

// Add this to functions.php
add_shortcode('visitor_count', 'get_visitor_count');

然后使用[visitor_count]引用访问数据。

  1. 加入安全措施
  • 限制登录后访问,在页面模板前加入:
# Add this at the top of page-statistics.php
<?php
if (!current_user_can('manage_options')) {
    wp_die('Access denied');
}
?>
  • 为python脚本加入日志信息:
# Add to nginx_stats.py
import logging
logging.basicConfig(
    filename='/var/log/nginx_stats.log',
    level=logging.INFO,
    format='%(asctime)s - %(message)s'
)
  • 一个加入认证机制的页面模板
<?php
/*
Template Name: Statistics Page
*/

// Basic WordPress authentication
if (!is_user_logged_in()) {
    auth_redirect();
    exit;
}

// Advanced role-based authentication
if (!current_user_can('manage_options') && !current_user_can('edit_posts')) {
    wp_die('You do not have sufficient permissions to access this page.');
}

// Optional: Add specific user check
$allowed_users = array('admin', 'editor', 'analyst');
$current_user = wp_get_current_user();
if (!in_array($current_user->user_login, $allowed_users)) {
    wp_die('Your user account does not have access to statistics.');
}

// Optional: Add IP-based restrictions
$allowed_ips = array(
    '192.168.1.100',
    '10.0.0.1'
);
$current_ip = $_SERVER['REMOTE_ADDR'];
if (!in_array($current_ip, $allowed_ips)) {
    // Log failed attempt
    error_log("Stats page access attempted from unauthorized IP: {$current_ip}");
    wp_die('Access not allowed from your location.');
}

get_header(); ?>

<div id="primary" class="content-area">
    <main id="main" class="site-main">
        <article class="page type-page">
            <div class="entry-content">
                <?php
                // Add nonce for additional security
                if (!wp_verify_nonce($_GET['stats_nonce'], 'view_stats')) {
                    $stats_nonce = wp_create_nonce('view_stats');
                }
                
                $stats_file = '/path/to/your/stats.html';
                if (file_exists($stats_file)) {
                    // Read the HTML content
                    $html = file_get_contents($stats_file);
                    preg_match('/<div class="stats-container">(.*?)<\/div>/s', $html, $matches);
                    if (isset($matches[0])) {
                        // Add last updated timestamp
                        echo '<p class="stats-updated">Last updated: ' . date('Y-m-d H:i:s', filemtime($stats_file)) . '</p>';
                        echo $matches[0];
                    } else {
                        echo 'Statistics are currently being updated.';
                    }
                } else {
                    echo 'Statistics file not found.';
                }
                ?>
            </div>
        </article>
    </main>
</div>

<?php get_footer(); ?>

You can choose which authentication methods you want to use:

- Basic login requirement (keep the is_user_logged_in() check)
- Role-based access (keep the current_user_can() check)
- Specific users (customize the $allowed_users array)
- IP restrictions (customize the $allowed_ips array)
  • 一个简单的加入限制刷新频率的页面模板
<?php
/*
Template Name: Simple Visitor Count
*/

// Start session for rate limiting
if (!isset($_SESSION)) {
    session_start();
}

// Simple rate limiting
$rate_limit = 300; // 5 minutes
if (isset($_SESSION['last_stats_view']) && 
    time() - $_SESSION['last_stats_view'] < $rate_limit) {
    wp_die('Please wait a few minutes before refreshing the statistics.');
}
$_SESSION['last_stats_view'] = time();

get_header(); ?>

<div id="primary" class="content-area">
    <main id="main" class="site-main">
        <article class="page type-page">
            <div class="entry-content">
                <style>
                    .visitor-count-container {
                        text-align: center;
                        padding: 2em;
                        margin: 2em 0;
                        background: #f8f9fa;
                        border-radius: 8px;
                    }
                    .visitor-count {
                        font-size: 2em;
                        color: #2c3e50;
                        margin-bottom: 0.5em;
                    }
                    .last-updated {
                        color: #7f8c8d;
                        font-size: 0.9em;
                    }
                </style>
                <?php
                $stats_file = '/path/to/your/visitor_count.html';
                if (file_exists($stats_file)) {
                    include($stats_file);
                } else {
                    echo '<p>Visitor statistics are currently unavailable.</p>';
                }
                ?>
            </div>
        </article>
    </main>
</div>

<?php get_footer(); ?>
  1. 使用cron自动化运行脚本
# Open crontab editor
crontab -e

# Add this line to run the script every hour
0 * * * * /usr/bin/python3 /path/to/nginx_stats.py

# Or run it every 6 hours
0 */6 * * * /usr/bin/python3 /path/to/nginx_stats.py