# utils/monitoring.py
"""
Module de surveillance avancée pour NeuroPulse Monitor Pro
Collecte des métriques système détaillées et analyse les performances
"""

import psutil
import os
import time
import socket
import subprocess
import json
import logging
from datetime import datetime, timedelta
from collections import defaultdict, deque
import threading
import hashlib

logger = logging.getLogger(__name__)

class SystemMonitor:
    """Classe principale pour la surveillance système avancée"""
    
    def __init__(self, config=None):
        self.config = config or {}
        self.cache = {}
        self.cache_timeout = 5  # Cache de 5 secondes
        self.network_history = deque(maxlen=300)
        self.process_history = defaultdict(lambda: deque(maxlen=60))
        self.last_network_stats = {}
        self.alert_history = deque(maxlen=1000)
        
    def get_system_metrics(self):
        """Collecte complète des métriques système"""
        try:
            metrics = {
                'timestamp': datetime.now().isoformat(),
                'cpu': self._get_cpu_metrics(),
                'memory': self._get_memory_metrics(),
                'disk': self._get_disk_metrics(),
                'network': self._get_network_metrics(),
                'processes': self._get_process_metrics(),
                'services': self._get_service_metrics(),
                'system': self._get_system_info(),
                'security': self._get_security_metrics(),
                'performance': self._get_performance_metrics()
            }
            
            # Analyse et détection d'anomalies
            metrics['anomalies'] = self._detect_anomalies(metrics)
            
            return metrics
            
        except Exception as e:
            logger.error(f"Erreur lors de la collecte des métriques: {e}")
            return {}
    
    def _get_cpu_metrics(self):
        """Métriques CPU détaillées"""
        try:
            cpu_times = psutil.cpu_times()
            cpu_stats = psutil.cpu_stats()
            cpu_freq = psutil.cpu_freq()
            
            # Calcul de l'utilisation par cœur
            cpu_percent_per_core = psutil.cpu_percent(interval=0.1, percpu=True)
            
            # Température CPU (si disponible)
            temperatures = {}
            try:
                sensors = psutil.sensors_temperatures()
                for name, entries in sensors.items():
                    if 'cpu' in name.lower() or 'core' in name.lower():
                        temperatures[name] = [sensor.current for sensor in entries]
            except:
                pass
            
            return {
                'percent': round(psutil.cpu_percent(interval=0.1), 2),
                'percent_per_core': [round(p, 2) for p in cpu_percent_per_core],
                'count_logical': psutil.cpu_count(logical=True),
                'count_physical': psutil.cpu_count(logical=False),
                'frequency': {
                    'current': round(cpu_freq.current, 2) if cpu_freq else 0,
                    'min': round(cpu_freq.min, 2) if cpu_freq else 0,
                    'max': round(cpu_freq.max, 2) if cpu_freq else 0
                },
                'times': {
                    'user': cpu_times.user,
                    'system': cpu_times.system,
                    'idle': cpu_times.idle,
                    'iowait': getattr(cpu_times, 'iowait', 0),
                    'irq': getattr(cpu_times, 'irq', 0),
                    'softirq': getattr(cpu_times, 'softirq', 0)
                },
                'stats': {
                    'ctx_switches': cpu_stats.ctx_switches,
                    'interrupts': cpu_stats.interrupts,
                    'soft_interrupts': cpu_stats.soft_interrupts,
                    'syscalls': getattr(cpu_stats, 'syscalls', 0)
                },
                'load_avg': list(os.getloadavg()) if hasattr(os, 'getloadavg') else [0, 0, 0],
                'temperatures': temperatures
            }
            
        except Exception as e:
            logger.error(f"Erreur CPU metrics: {e}")
            return {}
    
    def _get_memory_metrics(self):
        """Métriques mémoire détaillées"""
        try:
            virtual_mem = psutil.virtual_memory()
            swap_mem = psutil.swap_memory()
            
            return {
                'virtual': {
                    'total': virtual_mem.total,
                    'available': virtual_mem.available,
                    'percent': round(virtual_mem.percent, 2),
                    'used': virtual_mem.used,
                    'free': virtual_mem.free,
                    'active': getattr(virtual_mem, 'active', 0),
                    'inactive': getattr(virtual_mem, 'inactive', 0),
                    'buffers': getattr(virtual_mem, 'buffers', 0),
                    'cached': getattr(virtual_mem, 'cached', 0),
                    'shared': getattr(virtual_mem, 'shared', 0)
                },
                'swap': {
                    'total': swap_mem.total,
                    'used': swap_mem.used,
                    'free': swap_mem.free,
                    'percent': round(swap_mem.percent, 2),
                    'sin': swap_mem.sin,
                    'sout': swap_mem.sout
                }
            }
            
        except Exception as e:
            logger.error(f"Erreur memory metrics: {e}")
            return {}
    
    def _get_disk_metrics(self):
        """Métriques disque détaillées"""
        try:
            disk_usage = {}
            disk_io = psutil.disk_io_counters(perdisk=True)
            
            # Usage par partition
            for partition in psutil.disk_partitions():
                try:
                    if partition.fstype:
                        usage = psutil.disk_usage(partition.mountpoint)
                        disk_usage[partition.device] = {
                            'mountpoint': partition.mountpoint,
                            'fstype': partition.fstype,
                            'total': usage.total,
                            'used': usage.used,
                            'free': usage.free,
                            'percent': round((usage.used / usage.total) * 100, 2)
                        }
                except (PermissionError, FileNotFoundError):
                    continue
            
            # I/O stats globales
            total_io = psutil.disk_io_counters()
            io_stats = {
                'read_count': total_io.read_count if total_io else 0,
                'write_count': total_io.write_count if total_io else 0,
                'read_bytes': total_io.read_bytes if total_io else 0,
                'write_bytes': total_io.write_bytes if total_io else 0,
                'read_time': total_io.read_time if total_io else 0,
                'write_time': total_io.write_time if total_io else 0
            }
            
            # I/O par disque
            io_per_disk = {}
            if disk_io:
                for disk, stats in disk_io.items():
                    io_per_disk[disk] = {
                        'read_count': stats.read_count,
                        'write_count': stats.write_count,
                        'read_bytes': stats.read_bytes,
                        'write_bytes': stats.write_bytes,
                        'read_time': stats.read_time,
                        'write_time': stats.write_time
                    }
            
            return {
                'usage': disk_usage,
                'io_global': io_stats,
                'io_per_disk': io_per_disk
            }
            
        except Exception as e:
            logger.error(f"Erreur disk metrics: {e}")
            return {}
    
    def _get_network_metrics(self):
        """Métriques réseau avancées"""
        try:
            # Stats globales
            net_io = psutil.net_io_counters()
            
            # Stats par interface
            net_io_per_nic = psutil.net_io_counters(pernic=True)
            
            # Connexions réseau
            connections = psutil.net_connections()
            conn_stats = {
                'total': len(connections),
                'established': len([c for c in connections if c.status == 'ESTABLISHED']),
                'listen': len([c for c in connections if c.status == 'LISTEN']),
                'time_wait': len([c for c in connections if c.status == 'TIME_WAIT'])
            }
            
            # Calcul des vitesses
            current_time = time.time()
            speeds = {'in': 0, 'out': 0}
            
            if 'network_timestamp' in self.last_network_stats:
                time_diff = current_time - self.last_network_stats['network_timestamp']
                if time_diff > 0:
                    bytes_in_diff = net_io.bytes_recv - self.last_network_stats.get('bytes_recv', 0)
                    bytes_out_diff = net_io.bytes_sent - self.last_network_stats.get('bytes_sent', 0)
                    
                    speeds['in'] = bytes_in_diff / time_diff  # bytes/sec
                    speeds['out'] = bytes_out_diff / time_diff  # bytes/sec
            
            # Mise à jour du cache
            self.last_network_stats = {
                'bytes_recv': net_io.bytes_recv,
                'bytes_sent': net_io.bytes_sent,
                'network_timestamp': current_time
            }
            
            # Adresses IP
            addresses = {}
            for interface, addrs in psutil.net_if_addrs().items():
                addresses[interface] = []
                for addr in addrs:
                    if addr.family == socket.AF_INET:
                        addresses[interface].append({
                            'address': addr.address,
                            'netmask': addr.netmask,
                            'broadcast': addr.broadcast
                        })
            
            return {
                'global': {
                    'bytes_sent': net_io.bytes_sent,
                    'bytes_recv': net_io.bytes_recv,
                    'packets_sent': net_io.packets_sent,
                    'packets_recv': net_io.packets_recv,
                    'errin': net_io.errin,
                    'errout': net_io.errout,
                    'dropin': net_io.dropin,
                    'dropout': net_io.dropout
                },
                'per_interface': net_io_per_nic,
                'connections': conn_stats,
                'speeds': {
                    'bytes_in_per_sec': speeds['in'],
                    'bytes_out_per_sec': speeds['out'],
                    'kb_in_per_sec': round(speeds['in'] / 1024, 2),
                    'kb_out_per_sec': round(speeds['out'] / 1024, 2)
                },
                'addresses': addresses
            }
            
        except Exception as e:
            logger.error(f"Erreur network metrics: {e}")
            return {}
    
    def _get_process_metrics(self):
        """Métriques des processus"""
        try:
            processes = []
            total_processes = 0
            running_processes = 0
            sleeping_processes = 0
            zombie_processes = 0
            
            # Top 10 processes par CPU et mémoire
            top_cpu_processes = []
            top_memory_processes = []
            
            for proc in psutil.process_iter(['pid', 'name', 'cpu_percent', 'memory_percent', 
                                           'status', 'create_time', 'num_threads', 'cmdline']):
                try:
                    info = proc.info
                    total_processes += 1
                    
                    if info['status'] == psutil.STATUS_RUNNING:
                        running_processes += 1
                    elif info['status'] == psutil.STATUS_SLEEPING:
                        sleeping_processes += 1
                    elif info['status'] == psutil.STATUS_ZOMBIE:
                        zombie_processes += 1
                    
                    # Collecter les top processes
                    process_data = {
                        'pid': info['pid'],
                        'name': info['name'],
                        'cpu_percent': round(info['cpu_percent'] or 0, 2),
                        'memory_percent': round(info['memory_percent'] or 0, 2),
                        'status': info['status'],
                        'num_threads': info['num_threads'],
                        'cmdline': ' '.join(info['cmdline'][:3]) if info['cmdline'] else ''
                    }
                    
                    if process_data['cpu_percent'] > 0:
                        top_cpu_processes.append(process_data)
                    if process_data['memory_percent'] > 0:
                        top_memory_processes.append(process_data)
                        
                except (psutil.NoSuchProcess, psutil.AccessDenied):
                    continue
            
            # Trier et garder le top 10
            top_cpu_processes = sorted(top_cpu_processes, key=lambda x: x['cpu_percent'], reverse=True)[:10]
            top_memory_processes = sorted(top_memory_processes, key=lambda x: x['memory_percent'], reverse=True)[:10]
            
            return {
                'total': total_processes,
                'running': running_processes,
                'sleeping': sleeping_processes,
                'zombie': zombie_processes,
                'top_cpu': top_cpu_processes,
                'top_memory': top_memory_processes
            }
            
        except Exception as e:
            logger.error(f"Erreur process metrics: {e}")
            return {}
    
    def _get_service_metrics(self):
        """Surveillance des services système"""
        try:
            services = []
            
            # Services configurés
            services_config = self.config.get('services_to_monitor', [])
            
            for service_config in services_config:
                if not service_config.get('enabled', True):
                    continue
                    
                service_name = service_config['name']
                service_info = {
                    'name': service_name,
                    'title': service_config.get('title', service_name),
                    'type': service_config.get('service_type', 'Service'),
                    'criticality': service_config.get('criticality', 'Moyenne'),
                    'port': service_config.get('port'),
                    'active': False,
                    'enabled': False,
                    'port_open': False,
                    'status_details': {},
                    'last_check': datetime.now().isoformat()
                }
                
                try:
                    # Vérifier le statut systemctl
                    result = subprocess.run(
                        ['systemctl', 'is-active', service_name],
                        capture_output=True, text=True, timeout=5
                    )
                    service_info['active'] = result.stdout.strip() == 'active'
                    
                    # Vérifier si le service est enabled
                    result = subprocess.run(
                        ['systemctl', 'is-enabled', service_name],
                        capture_output=True, text=True, timeout=5
                    )
                    service_info['enabled'] = result.stdout.strip() == 'enabled'
                    
                    # Obtenir des détails supplémentaires
                    result = subprocess.run(
                        ['systemctl', 'show', service_name, 
                         '--property=LoadState,ActiveState,SubState,MainPID,StatusText'],
                        capture_output=True, text=True, timeout=5
                    )
                    
                    details = {}
                    for line in result.stdout.strip().split('\n'):
                        if '=' in line:
                            key, value = line.split('=', 1)
                            details[key] = value
                    service_info['status_details'] = details
                    
                    # Vérifier le port si spécifié
                    if service_config.get('port'):
                        port = service_config['port']
                        try:
                            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                            sock.settimeout(2)
                            result = sock.connect_ex(('localhost', port))
                            service_info['port_open'] = result == 0
                            sock.close()
                        except:
                            service_info['port_open'] = False
                    
                except subprocess.TimeoutExpired:
                    service_info['error'] = 'Timeout lors de la vérification'
                except Exception as e:
                    service_info['error'] = str(e)
                
                services.append(service_info)
            
            return services
            
        except Exception as e:
            logger.error(f"Erreur service metrics: {e}")
            return []
    
    def _get_system_info(self):
        """Informations système générales"""
        try:
            boot_time = datetime.fromtimestamp(psutil.boot_time())
            uptime = datetime.now() - boot_time
            
            # Informations sur l'OS
            try:
                with open('/etc/os-release', 'r') as f:
                    os_info = {}
                    for line in f:
                        if '=' in line:
                            key, value = line.strip().split('=', 1)
                            os_info[key] = value.strip('"')
            except:
                os_info = {}
            
            # Informations utilisateurs
            users = []
            for user in psutil.users():
                users.append({
                    'name': user.name,
                    'terminal': user.terminal,
                    'host': user.host,
                    'started': datetime.fromtimestamp(user.started).isoformat()
                })
            
            return {
                'hostname': socket.gethostname(),
                'platform': {
                    'system': os.name,
                    'release': os.uname().release if hasattr(os, 'uname') else 'N/A',
                    'version': os.uname().version if hasattr(os, 'uname') else 'N/A',
                    'machine': os.uname().machine if hasattr(os, 'uname') else 'N/A'
                },
                'os_info': os_info,
                'boot_time': boot_time.isoformat(),
                'uptime': {
                    'total_seconds': int(uptime.total_seconds()),
                    'days': uptime.days,
                    'hours': uptime.seconds // 3600,
                    'minutes': (uptime.seconds % 3600) // 60
                },
                'users': users,
                'python_version': os.sys.version
            }
            
        except Exception as e:
            logger.error(f"Erreur system info: {e}")
            return {}
    
    def _get_security_metrics(self):
        """Métriques de sécurité"""
        try:
            security_data = {
                'last_logins': [],
                'failed_logins': [],
                'sudo_usage': [],
                'open_ports': [],
                'running_as_root': []
            }
            
            # Ports ouverts
            try:
                connections = psutil.net_connections(kind='inet')
                listening_ports = set()
                for conn in connections:
                    if conn.status == 'LISTEN' and conn.laddr:
                        listening_ports.add(conn.laddr.port)
                security_data['open_ports'] = sorted(list(listening_ports))
            except:
                pass
            
            # Processus tournant en root
            try:
                for proc in psutil.process_iter(['pid', 'name', 'username']):
                    try:
                        if proc.info['username'] == 'root':
                            security_data['running_as_root'].append({
                                'pid': proc.info['pid'],
                                'name': proc.info['name']
                            })
                    except (psutil.NoSuchProcess, psutil.AccessDenied):
                        continue
            except:
                pass
            
            return security_data
            
        except Exception as e:
            logger.error(f"Erreur security metrics: {e}")
            return {}
    
    def _get_performance_metrics(self):
        """Métriques de performance et optimisation"""
        try:
            perf_data = {
                'context_switches_per_sec': 0,
                'interrupts_per_sec': 0,
                'memory_pressure': 0,
                'io_wait_time': 0,
                'cache_hit_ratio': 0
            }
            
            # Calculs de performance basés sur les stats CPU
            cpu_stats = psutil.cpu_stats()
            cpu_times = psutil.cpu_times()
            
            # Context switches et interrupts par seconde (approximation)
            if hasattr(self, '_last_cpu_stats'):
                time_diff = time.time() - self._last_cpu_check
                if time_diff > 0:
                    perf_data['context_switches_per_sec'] = (
                        cpu_stats.ctx_switches - self._last_cpu_stats.ctx_switches
                    ) / time_diff
                    perf_data['interrupts_per_sec'] = (
                        cpu_stats.interrupts - self._last_cpu_stats.interrupts
                    ) / time_diff
            
            self._last_cpu_stats = cpu_stats
            self._last_cpu_check = time.time()
            
            # Pression mémoire (basée sur swap et cache)
            memory = psutil.virtual_memory()
            if memory.total > 0:
                perf_data['memory_pressure'] = (
                    (memory.used - getattr(memory, 'cached', 0)) / memory.total
                ) * 100
            
            # Temps d'attente I/O
            if hasattr(cpu_times, 'iowait'):
                total_time = sum([
                    cpu_times.user, cpu_times.system, cpu_times.idle,
                    cpu_times.iowait, getattr(cpu_times, 'irq', 0),
                    getattr(cpu_times, 'softirq', 0)
                ])
                if total_time > 0:
                    perf_data['io_wait_time'] = (cpu_times.iowait / total_time) * 100
            
            return perf_data
            
        except Exception as e:
            logger.error(f"Erreur performance metrics: {e}")
            return {}
    
    def _detect_anomalies(self, metrics):
        """Détection d'anomalies basique"""
        anomalies = []
        
        try:
            # CPU élevé de manière persistante
            cpu_percent = metrics.get('cpu', {}).get('percent', 0)
            if cpu_percent > 95:
                anomalies.append({
                    'type': 'cpu_critical',
                    'severity': 'critical',
                    'message': f'CPU usage critically high: {cpu_percent}%',
                    'timestamp': datetime.now().isoformat()
                })
            
            # Mémoire très élevée
            memory_percent = metrics.get('memory', {}).get('virtual', {}).get('percent', 0)
            if memory_percent > 98:
                anomalies.append({
                    'type': 'memory_critical',
                    'severity': 'critical',
                    'message': f'Memory usage critically high: {memory_percent}%',
                    'timestamp': datetime.now().isoformat()
                })
            
            # Trop de processus zombies
            zombie_count = metrics.get('processes', {}).get('zombie', 0)
            if zombie_count > 10:
                anomalies.append({
                    'type': 'zombie_processes',
                    'severity': 'warning',
                    'message': f'High number of zombie processes: {zombie_count}',
                    'timestamp': datetime.now().isoformat()
                })
            
            # Disque plein
            disk_usage = metrics.get('disk', {}).get('usage', {})
            for device, usage in disk_usage.items():
                if usage.get('percent', 0) > 98:
                    anomalies.append({
                        'type': 'disk_full',
                        'severity': 'critical',
                        'message': f'Disk {device} critically full: {usage["percent"]}%',
                        'timestamp': datetime.now().isoformat()
                    })
            
        except Exception as e:
            logger.error(f"Erreur détection anomalies: {e}")
        
        return anomalies

# Fonction principale pour compatibilité
def get_system_metrics():
    """Fonction de compatibilité avec l'ancien système"""
    monitor = SystemMonitor()
    full_metrics = monitor.get_system_metrics()
    
    # Retourner le format simplifié pour compatibilité
    return {
        'cpu': full_metrics.get('cpu', {}).get('percent', 0),
        'ram': full_metrics.get('memory', {}).get('virtual', {}).get('percent', 0),
        'disk': 0,  # Sera calculé dans le backend principal
        'network': full_metrics.get('network', {}),
        'services': full_metrics.get('services', []),
        'system': full_metrics.get('system', {}),
        'anomalies': full_metrics.get('anomalies', [])
    }

# Classes utilitaires additionnelles

class AlertManager:
    """Gestionnaire d'alertes avancé"""
    
    def __init__(self, config):
        self.config = config
        self.alert_history = deque(maxlen=1000)
        self.suppressed_alerts = set()
    
    def check_alert_conditions(self, metrics):
        """Vérifier les conditions d'alerte"""
        alerts = []
        thresholds = self.config.get('alert_thresholds', {})
        
        # Implémentation des vérifications d'alertes
        # ... (code détaillé des alertes)
        
        return alerts
    
    def send_notification(self, alert):
        """Envoyer une notification"""
        # Implémentation des notifications
        pass

class PerformanceAnalyzer:
    """Analyseur de performance système"""
    
    def __init__(self):
        self.baseline_metrics = {}
        self.trend_data = defaultdict(list)
    
    def analyze_trends(self, metrics):
        """Analyser les tendances de performance"""
        # Implémentation de l'analyse de tendances
        pass
    
    def predict_resource_exhaustion(self, metrics):
        """Prédire l'épuisement des ressources"""
        # Implémentation de la prédiction
        pass

if __name__ == '__main__':
    # Test des métriques
    monitor = SystemMonitor()
    metrics = monitor.get_system_metrics()
    print(json.dumps(metrics, indent=2, default=str))