使用Claude制作了一个统计网站访问量的脚本:
import re
from collections import defaultdict
from datetime import datetime
import os
class NginxStatsParser:
def __init__(self, log_path='/var/log/nginx/access.log'):
self.log_path = log_path
self.ip_pattern = r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'
self.date_pattern = r'\[(\d{2}\/\w+\/\d{4})'
self.url_pattern = r'\"GET ([^\"]+)'
def parse_logs(self):
stats = {
'total_visits': 0,
'unique_visitors': set(),
'daily_visits': defaultdict(int),
'popular_pages': defaultdict(int),
'bot_visits': 0
}
try:
with open(self.log_path, 'r') as f:
for line in f:
# Skip bot requests
if 'bot' in line.lower() or 'crawler' in line.lower():
stats['bot_visits'] += 1
continue
# Extract IP
ip_match = re.search(self.ip_pattern, line)
if ip_match:
stats['unique_visitors'].add(ip_match.group(1))
# Extract date
date_match = re.search(self.date_pattern, line)
if date_match:
date = datetime.strptime(date_match.group(1), '%d/%b/%Y').strftime('%Y-%m-%d')
stats['daily_visits'][date] += 1
# Extract URL
url_match = re.search(self.url_pattern, line)
if url_match:
url = url_match.group(1)
# Skip static files and admin pages
if not any(ext in url for ext in ['.css', '.js', '.jpg', '.png', 'wp-admin']):
stats['popular_pages'][url] += 1
stats['total_visits'] += 1
except FileNotFoundError:
return f"Error: Log file not found at {self.log_path}"
return stats
def generate_report(self):
stats = self.parse_logs()
if isinstance(stats, str): # Error message
return stats
report = {
'summary': {
'total_visits': stats['total_visits'],
'unique_visitors': len(stats['unique_visitors']),
'bot_visits': stats['bot_visits']
},
'daily_visits': dict(sorted(stats['daily_visits'].items(), reverse=True)[:7]), # Last 7 days
'popular_pages': dict(sorted(stats['popular_pages'].items(),
key=lambda x: x[1], reverse=True)[:5]) # Top 5 pages
}
return report
def save_to_html(report, output_path='stats.html'):
"""Generate a simple HTML report"""
html = f"""
<!DOCTYPE html>
<html>
<head>
<title>Website Statistics</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 20px; }}
.stats-container {{ max-width: 800px; margin: 0 auto; }}
.stat-box {{ background: #f5f5f5; padding: 15px; margin: 10px 0; border-radius: 5px; }}
table {{ width: 100%; border-collapse: collapse; }}
th, td {{ padding: 8px; text-align: left; border-bottom: 1px solid #ddd; }}
</style>
</head>
<body>
<div class="stats-container">
<h1>Website Statistics</h1>
<div class="stat-box">
<h2>Summary</h2>
<p>Total Visits: {report['summary']['total_visits']}</p>
<p>Unique Visitors: {report['summary']['unique_visitors']}</p>
<p>Bot Visits: {report['summary']['bot_visits']}</p>
</div>
<div class="stat-box">
<h2>Daily Visits (Last 7 days)</h2>
<table>
<tr><th>Date</th><th>Visits</th></tr>
{''.join(f'<tr><td>{date}</td><td>{visits}</td></tr>'
for date, visits in report['daily_visits'].items())}
</table>
</div>
<div class="stat-box">
<h2>Popular Pages</h2>
<table>
<tr><th>Page</th><th>Visits</th></tr>
{''.join(f'<tr><td>{page}</td><td>{visits}</td></tr>'
for page, visits in report['popular_pages'].items())}
</table>
</div>
</div>
</body>
</html>
"""
with open(output_path, 'w') as f:
f.write(html)
if __name__ == '__main__':
# Usage example
parser = NginxStatsParser()
report = parser.generate_report()
save_to_html(report)
这个脚本将统计网站总的访问量、过去七天每日访问量、访问次数最多的页面等.
将其保存到文件夹后,运行脚本:
python3 nginx_stats.py
统计结果将输出为一个html文件。
如果要在网站使用这个html文件,有两种方法。
- 一种是建立一个网站访问统计页面模板
# stats_page_template.php
<?php
/*
Template Name: Statistics Page
*/
get_header(); ?>
<div id="primary" class="content-area">
<main id="main" class="site-main">
<article class="page type-page">
<div class="entry-content">
<?php
$stats_file = '/path/to/your/stats.html';
if (file_exists($stats_file)) {
// Read the HTML content but exclude DOCTYPE, html, head, and body tags
$html = file_get_contents($stats_file);
preg_match('/<div class="stats-container">(.*?)<\/div>/s', $html, $matches);
if (isset($matches[0])) {
echo $matches[0];
} else {
echo 'Statistics are currently being updated.';
}
} else {
echo 'Statistics file not found.';
}
?>
</div>
</article>
</main>
</div>
<?php get_footer(); ?>
将其中的/path/to/your/stats.html
替换为真实的文件地址,然后在后台新建页面,模板选择这个新的“stats_page_template”。
- 一种是使用短代码(shortcode)的方式
在functions.php中加入:
function get_visitor_count() {
$stats_file = '/path/to/your/visitor_count.html';
if (file_exists($stats_file)) {
return file_get_contents($stats_file);
}
return '';
}
记得将其中的/path/to/your/visitor_count.html
替换为真实地址
然后可以在header.php, footer.php, or sidebar.php等模板中引用:
header.php, footer.php, or sidebar.php
或者进一步在functions.php中加入:
// Add this to functions.php
add_shortcode('visitor_count', 'get_visitor_count');
然后使用[visitor_count]
引用访问数据。
- 加入安全措施
- 限制登录后访问,在页面模板前加入:
# Add this at the top of page-statistics.php
<?php
if (!current_user_can('manage_options')) {
wp_die('Access denied');
}
?>
- 为python脚本加入日志信息:
# Add to nginx_stats.py
import logging
logging.basicConfig(
filename='/var/log/nginx_stats.log',
level=logging.INFO,
format='%(asctime)s - %(message)s'
)
- 一个加入认证机制的页面模板
<?php
/*
Template Name: Statistics Page
*/
// Basic WordPress authentication
if (!is_user_logged_in()) {
auth_redirect();
exit;
}
// Advanced role-based authentication
if (!current_user_can('manage_options') && !current_user_can('edit_posts')) {
wp_die('You do not have sufficient permissions to access this page.');
}
// Optional: Add specific user check
$allowed_users = array('admin', 'editor', 'analyst');
$current_user = wp_get_current_user();
if (!in_array($current_user->user_login, $allowed_users)) {
wp_die('Your user account does not have access to statistics.');
}
// Optional: Add IP-based restrictions
$allowed_ips = array(
'192.168.1.100',
'10.0.0.1'
);
$current_ip = $_SERVER['REMOTE_ADDR'];
if (!in_array($current_ip, $allowed_ips)) {
// Log failed attempt
error_log("Stats page access attempted from unauthorized IP: {$current_ip}");
wp_die('Access not allowed from your location.');
}
get_header(); ?>
<div id="primary" class="content-area">
<main id="main" class="site-main">
<article class="page type-page">
<div class="entry-content">
<?php
// Add nonce for additional security
if (!wp_verify_nonce($_GET['stats_nonce'], 'view_stats')) {
$stats_nonce = wp_create_nonce('view_stats');
}
$stats_file = '/path/to/your/stats.html';
if (file_exists($stats_file)) {
// Read the HTML content
$html = file_get_contents($stats_file);
preg_match('/<div class="stats-container">(.*?)<\/div>/s', $html, $matches);
if (isset($matches[0])) {
// Add last updated timestamp
echo '<p class="stats-updated">Last updated: ' . date('Y-m-d H:i:s', filemtime($stats_file)) . '</p>';
echo $matches[0];
} else {
echo 'Statistics are currently being updated.';
}
} else {
echo 'Statistics file not found.';
}
?>
</div>
</article>
</main>
</div>
<?php get_footer(); ?>
You can choose which authentication methods you want to use:
- Basic login requirement (keep the is_user_logged_in() check)
- Role-based access (keep the current_user_can() check)
- Specific users (customize the $allowed_users array)
- IP restrictions (customize the $allowed_ips array)
- 一个简单的加入限制刷新频率的页面模板
<?php
/*
Template Name: Simple Visitor Count
*/
// Start session for rate limiting
if (!isset($_SESSION)) {
session_start();
}
// Simple rate limiting
$rate_limit = 300; // 5 minutes
if (isset($_SESSION['last_stats_view']) &&
time() - $_SESSION['last_stats_view'] < $rate_limit) {
wp_die('Please wait a few minutes before refreshing the statistics.');
}
$_SESSION['last_stats_view'] = time();
get_header(); ?>
<div id="primary" class="content-area">
<main id="main" class="site-main">
<article class="page type-page">
<div class="entry-content">
<style>
.visitor-count-container {
text-align: center;
padding: 2em;
margin: 2em 0;
background: #f8f9fa;
border-radius: 8px;
}
.visitor-count {
font-size: 2em;
color: #2c3e50;
margin-bottom: 0.5em;
}
.last-updated {
color: #7f8c8d;
font-size: 0.9em;
}
</style>
<?php
$stats_file = '/path/to/your/visitor_count.html';
if (file_exists($stats_file)) {
include($stats_file);
} else {
echo '<p>Visitor statistics are currently unavailable.</p>';
}
?>
</div>
</article>
</main>
</div>
<?php get_footer(); ?>
- 使用cron自动化运行脚本
# Open crontab editor
crontab -e
# Add this line to run the script every hour
0 * * * * /usr/bin/python3 /path/to/nginx_stats.py
# Or run it every 6 hours
0 */6 * * * /usr/bin/python3 /path/to/nginx_stats.py