loogle-scripts/services/telegram-bot/log_monitor.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import argparse
import datetime
import os
import re
from pathlib import Path
from collections import defaultdict, deque
from typing import Dict, List, Optional, Tuple

import requests


BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DEFAULT_PATTERNS = ["*.log", "*_log.txt"]
TOKEN_FILE_HOME = os.path.expanduser("~/.telegram_dpc_bot_token")
TOKEN_FILE_ETC = "/etc/telegram_dpc_bot_token"

TS_RE = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})")

CATEGORIES = {
    "open_meteo_timeout": re.compile(
        r"timeout|timed out|Read timed out|Gateway Time-out|504", re.IGNORECASE
    ),
    "ssl_handshake": re.compile(r"handshake", re.IGNORECASE),
    "permission_error": re.compile(r"PermissionError|permesso negato|Errno 13", re.IGNORECASE),
    "telegram_error": re.compile(r"Telegram error|Bad Request|chat not found|can't parse entities", re.IGNORECASE),
    "traceback": re.compile(r"Traceback", re.IGNORECASE),
    "exception": re.compile(r"\bERROR\b|Exception", re.IGNORECASE),
    "token_missing": re.compile(r"token missing|Token Telegram assente", re.IGNORECASE),
}


def load_text_file(path: str) -> str:
    try:
        with open(path, "r", encoding="utf-8") as f:
            return f.read().strip()
    except Exception:
        return ""


def load_bot_token() -> str:
    tok = os.environ.get("TELEGRAM_BOT_TOKEN", "").strip()
    if tok:
        return tok
    tok = load_text_file(TOKEN_FILE_HOME)
    if tok:
        return tok
    tok = load_text_file(TOKEN_FILE_ETC)
    return tok.strip() if tok else ""


def send_telegram(text: str, chat_ids: Optional[List[str]]) -> bool:
    token = load_bot_token()
    if not token or not chat_ids:
        return False
    url = f"https://api.telegram.org/bot{token}/sendMessage"
    base_payload = {
        "text": text,
        "disable_web_page_preview": True,
    }
    ok = False
    with requests.Session() as s:
        for chat_id in chat_ids:
            payload = dict(base_payload)
            payload["chat_id"] = chat_id
            try:
                resp = s.post(url, json=payload, timeout=15)
                if resp.status_code == 200:
                    ok = True
            except Exception:
                continue
    return ok


def tail_lines(path: str, max_lines: int) -> List[str]:
    dq: deque[str] = deque(maxlen=max_lines)
    with open(path, "r", encoding="utf-8", errors="ignore") as f:
        for line in f:
            dq.append(line.rstrip("\n"))
    return list(dq)


def parse_ts(line: str) -> Optional[datetime.datetime]:
    m = TS_RE.search(line)
    if not m:
        return None
    try:
        return datetime.datetime.strptime(m.group(1), "%Y-%m-%d %H:%M:%S")
    except Exception:
        return None


def analyze_logs(files: List[str], since: datetime.datetime, max_lines: int) -> Tuple[Dict, Dict, Dict, Dict]:
    category_hits = defaultdict(list)
    per_file_counts = defaultdict(lambda: defaultdict(int))
    timeout_minutes = defaultdict(int)
    stale_logs = {}  # path -> (last_ts, hours_since_update)

    now = datetime.datetime.now()

    for path in files:
        if not os.path.isfile(path):
            continue
        last_ts = None
        for line in tail_lines(path, max_lines):
            ts = parse_ts(line)
            if ts:
                last_ts = ts
            if not last_ts or last_ts < since:
                continue
            for cat, regex in CATEGORIES.items():
                if regex.search(line):
                    category_hits[cat].append((last_ts, path, line))
                    per_file_counts[path][cat] += 1
                    if cat == "open_meteo_timeout":
                        timeout_minutes[last_ts.strftime("%H:%M")] += 1
                    break

        # Verifica se il log è "stale" (non aggiornato da più di 24 ore)
        if last_ts:
            hours_since = (now - last_ts).total_seconds() / 3600.0
            if hours_since > 24:
                stale_logs[path] = (last_ts, hours_since)
        else:
            # Se non ha timestamp, verifica data di modifica del file
            try:
                mtime = os.path.getmtime(path)
                file_mtime = datetime.datetime.fromtimestamp(mtime)
                hours_since = (now - file_mtime).total_seconds() / 3600.0
                if hours_since > 24:
                    stale_logs[path] = (file_mtime, hours_since)
            except Exception:
                pass

    return category_hits, per_file_counts, timeout_minutes, stale_logs


def format_report(
    days: int,
    files: List[str],
    category_hits: Dict,
    per_file_counts: Dict,
    timeout_minutes: Dict,
    stale_logs: Dict,
) -> str:
    now = datetime.datetime.now()
    since = now - datetime.timedelta(days=days)
    lines = []
    lines.append(f"🧾 Log Monitor - ultimi {days} giorni")
    lines.append(f"Intervallo: {since.strftime('%Y-%m-%d %H:%M')} → {now.strftime('%Y-%m-%d %H:%M')}")
    lines.append(f"File analizzati: {len(files)}")
    lines.append("")

    # Sezione log non aggiornati
    if stale_logs:
        lines.append("⚠️ Log non aggiornati (>24h):")
        for path, (last_ts, hours_since) in sorted(stale_logs.items(), key=lambda x: x[1][1], reverse=True):
            short_path = os.path.basename(path)
            days_ago = hours_since / 24.0
            if days_ago >= 1:
                lines.append(f"  • {short_path}: {days_ago:.1f} giorni fa ({last_ts.strftime('%Y-%m-%d %H:%M')})")
            else:
                lines.append(f"  • {short_path}: {hours_since:.1f} ore fa ({last_ts.strftime('%Y-%m-%d %H:%M')})")
        lines.append("")

    total_issues = sum(len(v) for v in category_hits.values())
    if total_issues == 0 and not stale_logs:
        lines.append("✅ Nessun problema rilevato nelle ultime 72 ore.")
        return "\n".join(lines)

    lines.append(f"Problemi rilevati: {total_issues}")
    lines.append("")

    for cat, items in sorted(category_hits.items(), key=lambda x: len(x[1]), reverse=True):
        lines.append(f"- {cat}: {len(items)}")
        # show up to 3 latest samples
        for ts, path, msg in sorted(items, key=lambda x: x[0], reverse=True)[:3]:
            short_path = os.path.basename(path)
            lines.append(f"  • {ts.strftime('%Y-%m-%d %H:%M:%S')} | {short_path} | {msg[:180]}")
        lines.append("")

    if timeout_minutes:
        lines.append("Timeout: minuti più frequenti")
        for minute, count in sorted(timeout_minutes.items(), key=lambda x: x[1], reverse=True)[:6]:
            lines.append(f"  • {minute} -> {count}")
        lines.append("")

    # Per-file summary (top 6 files)
    lines.append("File più problematici")
    file_totals = []
    for path, cats in per_file_counts.items():
        file_totals.append((sum(cats.values()), path))
    for total, path in sorted(file_totals, reverse=True)[:6]:
        short_path = os.path.basename(path)
        lines.append(f"  • {short_path}: {total}")

    return "\\n".join(lines)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--days", type=int, default=3, help="Numero di giorni da analizzare")
    parser.add_argument("--max-lines", type=int, default=5000, help="Limite righe per file")
    parser.add_argument("--chat_id", help="Chat ID Telegram (separati da virgola)")
    parser.add_argument("--log", action="append", help="Aggiungi un file log specifico")
    args = parser.parse_args()

    if args.log:
        files = [p for p in args.log if os.path.exists(p)]
    else:
        from pathlib import Path
        files = []
        for pat in DEFAULT_PATTERNS:
            files.extend(sorted([str(p) for p in Path(BASE_DIR).glob(pat)]))
        files = sorted(set(files))

    since = datetime.datetime.now() - datetime.timedelta(days=args.days)
    category_hits, per_file_counts, timeout_minutes, stale_logs = analyze_logs(files, since, args.max_lines)
    report = format_report(args.days, files, category_hits, per_file_counts, timeout_minutes, stale_logs)

    if args.chat_id:
        chat_ids = [c.strip() for c in args.chat_id.split(",") if c.strip()]
        send_telegram(report, chat_ids)
    else:
        print(report)


if __name__ == "__main__":
    main()