<?php
/**
 * Detector Avançado de VPN/Proxy/Datacenter/Bots
 *
 * Detecta:
 * - VPNs comerciais
 * - Proxies
 * - Datacenters (AWS, Google Cloud, Azure, DigitalOcean, etc)
 * - Bots e Crawlers
 * - Facebook Crawlers
 * - Scrapers
 */

/**
 * Lista de ISPs/ASNs conhecidos de datacenters
 */
$DATACENTER_ISPS = [
    // Cloud Providers
    'amazon', 'aws', 'ec2',
    'google', 'google cloud', 'gcp', 'googlebot',
    'microsoft', 'azure', 'microsoft azure',
    'digitalocean', 'linode', 'vultr', 'ovh',
    'hetzner', 'contabo', 'scaleway',
    'oracle cloud', 'alibaba cloud', 'tencent cloud',
    'ibm cloud', 'rackspace', 'cloudflare',

    // VPS/Hosting
    'hostinger', 'godaddy', 'bluehost', 'hostgator',
    'namecheap', 'dreamhost', 'siteground', 'a2 hosting',
    'inmotionhosting', 'liquidweb', 'kamatera',

    // VPN Providers
    'nordvpn', 'expressvpn', 'surfshark', 'cyberghost',
    'private internet access', 'pia', 'ipvanish', 'hidemyass',
    'protonvpn', 'mullvad', 'windscribe', 'tunnelbear',

    // Proxy Services
    'luminati', 'brightdata', 'smartproxy', 'oxylabs',
    'geosurf', 'netnut', 'storm proxies',

    // European Datacenters
    'leaseweb', 'serverius', 'worldstream', 'i3d',
    'online.net', 'iliad', 'free sas', 'bouygues',
    'm247', 'datacamp', 'quickpacket',
];

/**
 * User agents de bots/crawlers conhecidos
 */
$BOT_USER_AGENTS = [
    // Search Engine Bots
    'googlebot', 'bingbot', 'slurp', 'duckduckbot',
    'baiduspider', 'yandexbot', 'sogou', 'exabot',

    // Facebook/Meta
    'facebookexternalhit', 'facebot', 'facebook',
    'meta-externalagent', 'facebookcatalog',

    // Social Media
    'twitterbot', 'linkedinbot', 'pinterest', 'slackbot',
    'whatsapp', 'telegrambot', 'discordbot',

    // Generic Bots
    'bot', 'crawler', 'spider', 'scraper', 'crawl',
    'fetch', 'archive', 'index',

    // Libraries/Tools
    'curl', 'wget', 'python', 'python-requests', 'python-urllib',
    'java', 'httpclient', 'libwww', 'perl', 'ruby',
    'go-http-client', 'node-fetch', 'axios', 'request',
    'scrapy', 'selenium', 'phantomjs', 'headless',
    'puppeteer', 'playwright', 'mechanize',

    // SEO/Analytics Tools
    'semrush', 'ahrefs', 'moz', 'majestic', 'screaming frog',
    'seokicks', 'sistrix', 'serpstat', 'spyfu',

    // Other
    'postman', 'insomnia', 'httpie', 'gtmetrix', 'pingdom',
    'uptimerobot', 'statuscake', 'site24x7',
];

/**
 * Ranges de IP conhecidos de datacenters (CIDR)
 * Esta é uma lista parcial - em produção use um banco de dados completo
 */
$DATACENTER_IP_RANGES = [
    // AWS (alguns ranges)
    '3.0.0.0/8',
    '13.0.0.0/8',
    '18.0.0.0/8',
    '34.0.0.0/8',
    '35.0.0.0/8',
    '52.0.0.0/8',
    '54.0.0.0/8',

    // Google Cloud
    '34.64.0.0/10',
    '35.184.0.0/13',
    '35.192.0.0/12',
    '35.224.0.0/12',

    // Azure
    '13.64.0.0/11',
    '13.96.0.0/13',
    '20.0.0.0/8',
    '40.64.0.0/10',

    // DigitalOcean
    '104.131.0.0/16',
    '138.68.0.0/16',
    '139.59.0.0/16',
    '142.93.0.0/16',
    '157.230.0.0/16',
    '159.65.0.0/16',
    '159.89.0.0/16',
    '161.35.0.0/16',
    '164.90.0.0/16',
    '165.22.0.0/16',
    '167.71.0.0/16',
    '167.99.0.0/16',
    '174.138.0.0/16',
    '178.62.0.0/16',
    '188.166.0.0/16',
    '206.189.0.0/16',
    '209.97.0.0/16',

    // Vultr
    '45.32.0.0/16',
    '45.63.0.0/16',
    '45.76.0.0/16',
    '45.77.0.0/16',
    '66.42.0.0/16',
    '104.156.0.0/16',
    '108.61.0.0/16',
    '140.82.0.0/16',
    '149.28.0.0/16',
    '155.138.0.0/16',
    '207.148.0.0/16',
    '208.167.0.0/16',
    '216.128.0.0/16',

    // Linode
    '45.33.0.0/16',
    '45.56.0.0/16',
    '45.79.0.0/16',
    '50.116.0.0/16',
    '66.175.0.0/16',
    '69.164.0.0/16',
    '72.14.0.0/16',
    '74.207.0.0/16',
    '85.159.0.0/16',
    '96.126.0.0/16',
    '97.107.0.0/16',
    '139.162.0.0/16',
    '172.104.0.0/16',
    '173.230.0.0/16',
    '173.255.0.0/16',
    '176.58.0.0/16',
    '178.79.0.0/16',
    '192.155.0.0/16',
    '198.58.0.0/16',
    '198.74.0.0/16',

    // OVH
    '51.68.0.0/16',
    '51.75.0.0/16',
    '51.77.0.0/16',
    '51.79.0.0/16',
    '51.83.0.0/16',
    '51.89.0.0/16',
    '51.91.0.0/16',
    '54.36.0.0/16',
    '54.37.0.0/16',
    '54.38.0.0/16',
    '54.39.0.0/16',
    '91.134.0.0/16',
    '92.222.0.0/16',
    '135.125.0.0/16',
    '137.74.0.0/16',
    '139.99.0.0/16',
    '141.94.0.0/16',
    '144.217.0.0/16',
    '145.239.0.0/16',
    '146.59.0.0/16',
    '147.135.0.0/16',
    '148.113.0.0/16',
    '149.56.0.0/16',
    '151.80.0.0/16',
    '158.69.0.0/16',
    '162.19.0.0/16',
    '164.132.0.0/16',
    '167.114.0.0/16',
    '176.31.0.0/16',
    '178.32.0.0/16',
    '178.33.0.0/16',
    '185.12.0.0/16',
    '188.165.0.0/16',
    '192.95.0.0/16',
    '193.70.0.0/16',
    '198.27.0.0/16',
    '198.50.0.0/16',
    '198.100.0.0/16',
    '198.245.0.0/16',

    // Hetzner
    '5.9.0.0/16',
    '46.4.0.0/16',
    '78.46.0.0/16',
    '85.10.0.0/16',
    '88.198.0.0/16',
    '88.99.0.0/16',
    '94.130.0.0/16',
    '95.216.0.0/16',
    '116.202.0.0/16',
    '116.203.0.0/16',
    '128.140.0.0/16',
    '135.181.0.0/16',
    '136.243.0.0/16',
    '138.201.0.0/16',
    '142.132.0.0/16',
    '144.76.0.0/16',
    '148.251.0.0/16',
    '157.90.0.0/16',
    '159.69.0.0/16',
    '162.55.0.0/16',
    '167.235.0.0/16',
    '168.119.0.0/16',
    '178.63.0.0/16',
    '188.40.0.0/16',
    '195.201.0.0/16',
    '213.133.0.0/16',
    '213.239.0.0/16',
];

/**
 * Verifica se é bot pelo User Agent
 */
function detector_is_bot($userAgent = null) {
    global $BOT_USER_AGENTS;

    $userAgent = strtolower($userAgent ?? $_SERVER['HTTP_USER_AGENT'] ?? '');

    if (empty($userAgent)) {
        return ['is_bot' => true, 'reason' => 'no_user_agent', 'type' => 'unknown'];
    }

    foreach ($BOT_USER_AGENTS as $bot) {
        if (strpos($userAgent, strtolower($bot)) !== false) {
            // Identificar tipo
            $type = 'generic_bot';
            if (strpos($userAgent, 'facebook') !== false || strpos($userAgent, 'facebot') !== false) {
                $type = 'facebook_crawler';
            } elseif (strpos($userAgent, 'google') !== false) {
                $type = 'google_crawler';
            } elseif (strpos($userAgent, 'bing') !== false) {
                $type = 'bing_crawler';
            } elseif (strpos($userAgent, 'python') !== false || strpos($userAgent, 'curl') !== false || strpos($userAgent, 'wget') !== false) {
                $type = 'scraper';
            } elseif (strpos($userAgent, 'selenium') !== false || strpos($userAgent, 'phantom') !== false || strpos($userAgent, 'headless') !== false) {
                $type = 'automation_tool';
            }

            return ['is_bot' => true, 'reason' => $bot, 'type' => $type];
        }
    }

    return ['is_bot' => false];
}

/**
 * Verifica se é Facebook Crawler especificamente
 */
function detector_is_facebook_crawler($userAgent = null) {
    $userAgent = strtolower($userAgent ?? $_SERVER['HTTP_USER_AGENT'] ?? '');

    $facebookBots = ['facebookexternalhit', 'facebot', 'facebook', 'meta-externalagent', 'facebookcatalog'];

    foreach ($facebookBots as $bot) {
        if (strpos($userAgent, $bot) !== false) {
            return true;
        }
    }

    return false;
}

/**
 * Verifica se IP está em range CIDR
 */
function detector_ip_in_range($ip, $cidr) {
    if (strpos($cidr, '/') === false) {
        return $ip === $cidr;
    }

    list($range, $netmask) = explode('/', $cidr, 2);

    $ip_decimal = ip2long($ip);
    $range_decimal = ip2long($range);
    $wildcard_decimal = pow(2, (32 - $netmask)) - 1;
    $netmask_decimal = ~$wildcard_decimal;

    return (($ip_decimal & $netmask_decimal) == ($range_decimal & $netmask_decimal));
}

/**
 * Verifica se IP é de datacenter conhecido
 */
function detector_is_datacenter_ip($ip = null) {
    global $DATACENTER_IP_RANGES;

    $ip = $ip ?? $_SERVER['REMOTE_ADDR'] ?? '';

    if (empty($ip) || !filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
        return ['is_datacenter' => false, 'reason' => 'invalid_ip'];
    }

    foreach ($DATACENTER_IP_RANGES as $range) {
        if (detector_ip_in_range($ip, $range)) {
            return ['is_datacenter' => true, 'range' => $range];
        }
    }

    return ['is_datacenter' => false];
}

/**
 * Verifica ISP do IP usando serviço externo
 * Usa ip-api.com (gratuito, 45 req/min)
 */
function detector_get_ip_info($ip = null) {
    $ip = $ip ?? $_SERVER['REMOTE_ADDR'] ?? '';

    if (empty($ip) || $ip === '127.0.0.1' || $ip === '::1') {
        return null;
    }

    // Cache em sessão
    if (isset($_SESSION['ip_info_' . $ip])) {
        return $_SESSION['ip_info_' . $ip];
    }

    $url = "http://ip-api.com/json/{$ip}?fields=status,country,countryCode,city,isp,org,as,proxy,hosting,mobile";

    $context = stream_context_create([
        'http' => [
            'timeout' => 3,
            'ignore_errors' => true
        ]
    ]);

    $response = @file_get_contents($url, false, $context);

    if ($response === false) {
        return null;
    }

    $data = json_decode($response, true);

    if (!$data || $data['status'] !== 'success') {
        return null;
    }

    // Cachear
    $_SESSION['ip_info_' . $ip] = $data;

    return $data;
}

/**
 * Verifica se é VPN/Proxy/Hosting usando API externa
 */
function detector_is_proxy_or_vpn($ip = null) {
    global $DATACENTER_ISPS;

    $ipInfo = detector_get_ip_info($ip);

    if (!$ipInfo) {
        // Fallback: verificar ranges conhecidos
        $dcCheck = detector_is_datacenter_ip($ip);
        if ($dcCheck['is_datacenter']) {
            return [
                'is_proxy' => true,
                'type' => 'datacenter',
                'reason' => 'ip_range_match',
                'details' => $dcCheck['range']
            ];
        }
        return ['is_proxy' => false, 'reason' => 'api_unavailable'];
    }

    // API retorna flags proxy e hosting
    if (!empty($ipInfo['proxy']) || !empty($ipInfo['hosting'])) {
        $type = $ipInfo['proxy'] ? 'proxy' : 'datacenter';
        return [
            'is_proxy' => true,
            'type' => $type,
            'isp' => $ipInfo['isp'] ?? 'unknown',
            'org' => $ipInfo['org'] ?? 'unknown',
            'country' => $ipInfo['countryCode'] ?? 'unknown',
            'city' => $ipInfo['city'] ?? 'unknown',
        ];
    }

    // Verificar ISP contra lista de datacenters
    $isp = strtolower($ipInfo['isp'] ?? '');
    $org = strtolower($ipInfo['org'] ?? '');
    $as = strtolower($ipInfo['as'] ?? '');

    foreach ($DATACENTER_ISPS as $datacenter) {
        if (strpos($isp, $datacenter) !== false ||
            strpos($org, $datacenter) !== false ||
            strpos($as, $datacenter) !== false) {
            return [
                'is_proxy' => true,
                'type' => 'datacenter_isp',
                'isp' => $ipInfo['isp'],
                'org' => $ipInfo['org'],
                'matched' => $datacenter,
                'country' => $ipInfo['countryCode'] ?? 'unknown',
            ];
        }
    }

    return [
        'is_proxy' => false,
        'isp' => $ipInfo['isp'] ?? 'unknown',
        'org' => $ipInfo['org'] ?? 'unknown',
        'country' => $ipInfo['countryCode'] ?? 'unknown',
        'city' => $ipInfo['city'] ?? 'unknown',
        'is_mobile' => $ipInfo['mobile'] ?? false,
    ];
}

/**
 * Verifica headers suspeitos de proxy
 */
function detector_has_proxy_headers() {
    $proxyHeaders = [
        'HTTP_VIA',
        'HTTP_X_FORWARDED_FOR',
        'HTTP_FORWARDED_FOR',
        'HTTP_X_FORWARDED',
        'HTTP_FORWARDED',
        'HTTP_CLIENT_IP',
        'HTTP_FORWARDED_FOR_IP',
        'HTTP_X_PROXY_ID',
        'HTTP_X_REAL_IP',
        'HTTP_PROXY_CONNECTION',
    ];

    $detected = [];

    foreach ($proxyHeaders as $header) {
        if (!empty($_SERVER[$header])) {
            $detected[] = $header;
        }
    }

    return [
        'has_proxy_headers' => !empty($detected),
        'headers' => $detected
    ];
}

/**
 * Análise completa do visitante
 */
function detector_analyze($ip = null, $userAgent = null) {
    $ip = $ip ?? $_SERVER['REMOTE_ADDR'] ?? '';
    $userAgent = $userAgent ?? $_SERVER['HTTP_USER_AGENT'] ?? '';

    $result = [
        'ip' => $ip,
        'is_threat' => false,
        'threat_level' => 0, // 0-100
        'reasons' => [],
        'details' => [],
    ];

    // 1. Verificar bot
    $botCheck = detector_is_bot($userAgent);
    if ($botCheck['is_bot']) {
        $result['is_threat'] = true;
        $result['threat_level'] += 80;
        $result['reasons'][] = 'bot_detected';
        $result['details']['bot'] = $botCheck;
    }

    // 2. Verificar headers de proxy
    $headerCheck = detector_has_proxy_headers();
    if ($headerCheck['has_proxy_headers']) {
        $result['threat_level'] += 20;
        $result['reasons'][] = 'proxy_headers';
        $result['details']['headers'] = $headerCheck['headers'];
    }

    // 3. Verificar datacenter IP (local)
    $dcCheck = detector_is_datacenter_ip($ip);
    if ($dcCheck['is_datacenter']) {
        $result['is_threat'] = true;
        $result['threat_level'] += 60;
        $result['reasons'][] = 'datacenter_ip';
        $result['details']['datacenter'] = $dcCheck;
    }

    // 4. Verificar VPN/Proxy via API (se não já detectado como datacenter)
    if (!$dcCheck['is_datacenter']) {
        $proxyCheck = detector_is_proxy_or_vpn($ip);
        if ($proxyCheck['is_proxy']) {
            $result['is_threat'] = true;
            $result['threat_level'] += 70;
            $result['reasons'][] = 'vpn_proxy_detected';
            $result['details']['proxy'] = $proxyCheck;
        } else {
            $result['details']['ip_info'] = $proxyCheck;
        }
    }

    // 5. Verificar User Agent vazio ou suspeito
    if (empty($userAgent)) {
        $result['is_threat'] = true;
        $result['threat_level'] += 50;
        $result['reasons'][] = 'no_user_agent';
    } elseif (strlen($userAgent) < 20) {
        $result['threat_level'] += 30;
        $result['reasons'][] = 'short_user_agent';
    }

    // Limitar threat_level a 100
    $result['threat_level'] = min(100, $result['threat_level']);

    return $result;
}

/**
 * Verifica se deve bloquear baseado na análise
 */
function detector_should_block($minThreatLevel = 50) {
    $analysis = detector_analyze();

    return [
        'block' => $analysis['threat_level'] >= $minThreatLevel,
        'analysis' => $analysis
    ];
}
?>
