<?php
header('Content-Type: application/json');

// Set execution time limit
set_time_limit(0);
ini_set('max_execution_time', 0);

$action = $_GET['action'] ?? '';

switch ($action) {
    case 'get_posts_list':
        getPostsList();
        break;
    case 'scrape_post':
        scrapePost();
        break;
    case 'get_scraped_posts':
        getScrapedPosts();
        break;
    case 'delete_scraped':
        deleteScrapedPost();
        break;
    case 'import_to_album':
        importToAlbum();
        break;
    case 'list_albums':
        listAlbums();
        break;
    default:
        http_response_code(400);
        echo json_encode(['error' => 'Invalid action']);
        break;
}

/**
 * Lấy danh sách posts từ cosplaytele.com (crawl trang chủ)
 */
function getPostsList() {
    $page = (int)($_GET['page'] ?? 1);
    $maxPages = (int)($_GET['max_pages'] ?? 10);
    
    if ($page > $maxPages) {
        echo json_encode(['posts' => [], 'has_more' => false]);
        return;
    }
    
    $baseUrl = 'https://cosplaytele.com';
    $url = $page == 1 ? $baseUrl . '/' : $baseUrl . '/page/' . $page . '/';
    
    $html = fetchUrl($url);
    
    if (!$html) {
        http_response_code(500);
        echo json_encode(['error' => 'Không thể fetch trang']);
        return;
    }
    
    $posts = parsePostsList($html, $baseUrl);
    
    $hasMore = $page < $maxPages;
    
    echo json_encode([
        'posts' => $posts,
        'page' => $page,
        'has_more' => $hasMore,
        'next_page' => $hasMore ? $page + 1 : null
    ]);
}

/**
 * Parse danh sách posts từ HTML
 */
function parsePostsList($html, $baseUrl) {
    libxml_use_internal_errors(true);
    $dom = new DOMDocument();
    @$dom->loadHTML('<?xml encoding="UTF-8">' . $html);
    $xpath = new DOMXPath($dom);
    
    $posts = [];
    $seenUrls = []; // Track seen URLs to avoid duplicates
    
    // Method 1: Find post links from headings (h2, h3, h4, h5, h6) with links
    $headingLinks = $xpath->query("//h2//a | //h3//a | //h4//a | //h5//a | //h6//a");
    foreach ($headingLinks as $link) {
        $href = $link->getAttribute('href');
        $title = trim($link->textContent);
        
        if ($href && !empty($title) && strlen($title) > 5) {
            // Normalize URL
            if (strpos($href, 'http') !== 0) {
                if (strpos($href, '/') === 0) {
                    $href = $baseUrl . $href;
                } else {
                    $href = $baseUrl . '/' . $href;
                }
            }
            
            // Filter: must be cosplaytele.com domain and valid post URL
            if (strpos($href, 'cosplaytele.com') !== false && 
                strpos($href, 'page/') === false &&
                strpos($href, 'category/') === false &&
                strpos($href, 'tag/') === false &&
                strpos($href, '#') === false &&
                !in_array($href, $seenUrls)) {
                
                $seenUrls[] = $href;
                $slug = basename(parse_url($href, PHP_URL_PATH));
                
                $postInfo = [
                    'url' => $href,
                    'title' => $title,
                    'slug' => $slug ?: 'post_' . md5($href)
                ];
                
                // Try to extract cosplayer/character from title
                if (preg_match('/(.+?)\s+cosplay\s+(.+?)(?:\s+–\s+(.+?))?(?:\s+"\d+)/i', $title, $matches)) {
                    $postInfo['cosplayer'] = trim($matches[1]);
                    $postInfo['character'] = trim($matches[2]);
                    if (isset($matches[3])) {
                        $postInfo['source'] = trim($matches[3]);
                    }
                }
                
                $posts[] = $postInfo;
            }
        }
    }
    
    // Method 2: Find article/post containers and extract links
    $articleNodes = $xpath->query("//article//a | //div[contains(@class, 'post')]//a | //div[contains(@class, 'entry')]//a");
    foreach ($articleNodes as $link) {
        $href = $link->getAttribute('href');
        $title = trim($link->textContent);
        
        if ($href && !empty($title) && strlen($title) > 5) {
            // Normalize URL
            if (strpos($href, 'http') !== 0) {
                if (strpos($href, '/') === 0) {
                    $href = $baseUrl . $href;
                } else {
                    $href = $baseUrl . '/' . $href;
                }
            }
            
            // Filter valid post URLs
            if (strpos($href, 'cosplaytele.com') !== false && 
                strpos($href, 'page/') === false &&
                strpos($href, 'category/') === false &&
                strpos($href, 'tag/') === false &&
                strpos($href, '#') === false &&
                !in_array($href, $seenUrls)) {
                
                $seenUrls[] = $href;
                $slug = basename(parse_url($href, PHP_URL_PATH));
                
                $postInfo = [
                    'url' => $href,
                    'title' => $title,
                    'slug' => $slug ?: 'post_' . md5($href)
                ];
                
                // Check if already exists in posts
                $exists = false;
                foreach ($posts as $existing) {
                    if ($existing['url'] === $href) {
                        $exists = true;
                        break;
                    }
                }
                
                if (!$exists) {
                    $posts[] = $postInfo;
                }
            }
        }
    }
    
    // Remove duplicates based on URL
    $uniquePosts = [];
    foreach ($posts as $post) {
        if (!isset($uniquePosts[$post['url']])) {
            $uniquePosts[$post['url']] = $post;
        }
    }
    
    return array_values($uniquePosts);
}

/**
 * Scrape chi tiết một post
 */
function scrapePost() {
    if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
        http_response_code(405);
        echo json_encode(['error' => 'Method not allowed']);
        return;
    }
    
    $data = json_decode(file_get_contents('php://input'), true);
    if (!$data) {
        $data = $_POST;
    }
    
    $url = $data['url'] ?? '';
    
    if (empty($url)) {
        http_response_code(400);
        echo json_encode(['error' => 'URL không được để trống']);
        return;
    }
    
    // Use existing scraper logic
    require_once 'scraper_api.php';
    
    $html = fetchUrl($url);
    
    if (!$html) {
        http_response_code(500);
        echo json_encode(['error' => 'Không thể fetch URL']);
        return;
    }
    
    $result = parseCosplaytelePage($html, $url);
    
    if (!$result || isset($result['error'])) {
        http_response_code(500);
        echo json_encode(['error' => $result['error'] ?? 'Lỗi khi parse']);
        return;
    }
    
    // Save to scraped directory
    $scrapedDir = __DIR__ . '/scraped/';
    if (!is_dir($scrapedDir)) {
        mkdir($scrapedDir, 0755, true);
    }
    
    $slug = $data['slug'] ?? basename(parse_url($url, PHP_URL_PATH));
    $slug = preg_replace('/[^a-zA-Z0-9_-]/', '_', $slug);
    $slug = substr($slug, 0, 100); // Limit length
    
    $file = $scrapedDir . $slug . '.json';
    
    // Prepare data
    $postData = [
        'slug' => $slug,
        'source_url' => $url,
        'title' => $result['title'] ?? ($result['cosplayer'] . ' cosplay ' . $result['character']),
        'cosplayer' => $result['cosplayer'] ?? '',
        'character' => $result['character'] ?? '',
        'source' => $result['source'] ?? '',
        'photo_count' => $result['photo_count'] ?? 0,
        'video_count' => $result['video_count'] ?? 0,
        'images' => $result['images'] ?? [],
        'videos' => $result['videos'] ?? [],
        'tags' => $result['tags'] ?? [],
        'categories' => $result['categories'] ?? [],
        'download_links' => $result['download_links'] ?? [],
        'thumbnail' => !empty($result['images']) ? $result['images'][0] : '',
        'scraped_date' => date('c')
    ];
    
    file_put_contents($file, json_encode($postData, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
    
    echo json_encode([
        'success' => true,
        'post' => $postData
    ]);
}

/**
 * Get all scraped posts
 */
function getScrapedPosts() {
    $scrapedDir = __DIR__ . '/scraped/';
    
    if (!is_dir($scrapedDir)) {
        echo json_encode([]);
        return;
    }
    
    $posts = [];
    $files = scandir($scrapedDir);
    
    foreach ($files as $file) {
        if ($file === '.' || $file === '..' || pathinfo($file, PATHINFO_EXTENSION) !== 'json') {
            continue;
        }
        
        $filepath = $scrapedDir . $file;
        $data = json_decode(file_get_contents($filepath), true);
        
        if ($data) {
            $posts[] = $data;
        }
    }
    
    // Sort by scraped date (newest first)
    usort($posts, function($a, $b) {
        $dateA = $a['scraped_date'] ?? '';
        $dateB = $b['scraped_date'] ?? '';
        return strcmp($dateB, $dateA);
    });
    
    echo json_encode($posts);
}

/**
 * Delete scraped post
 */
function deleteScrapedPost() {
    if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
        http_response_code(405);
        echo json_encode(['error' => 'Method not allowed']);
        return;
    }
    
    $slug = $_GET['slug'] ?? $_POST['slug'] ?? '';
    
    if (empty($slug)) {
        http_response_code(400);
        echo json_encode(['error' => 'Thiếu slug']);
        return;
    }
    
    $scrapedDir = __DIR__ . '/scraped/';
    $file = $scrapedDir . $slug . '.json';
    
    if (!file_exists($file)) {
        http_response_code(404);
        echo json_encode(['error' => 'Không tìm thấy file']);
        return;
    }
    
    if (unlink($file)) {
        echo json_encode(['success' => true]);
    } else {
        http_response_code(500);
        echo json_encode(['error' => 'Không thể xóa file']);
    }
}

/**
 * Import scraped post to album
 */
function importToAlbum() {
    if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
        http_response_code(405);
        echo json_encode(['error' => 'Method not allowed']);
        return;
    }
    
    $data = json_decode(file_get_contents('php://input'), true);
    if (!$data) {
        $data = $_POST;
    }
    
    $slug = $data['slug'] ?? '';
    $albumId = $data['album_id'] ?? '';
    
    if (empty($slug) || empty($albumId)) {
        http_response_code(400);
        echo json_encode(['error' => 'Thiếu slug hoặc album_id']);
        return;
    }
    
    $scrapedDir = __DIR__ . '/scraped/';
    $file = $scrapedDir . $slug . '.json';
    
    if (!file_exists($file)) {
        http_response_code(404);
        echo json_encode(['error' => 'Không tìm thấy post đã scrape']);
        return;
    }
    
    $postData = json_decode(file_get_contents($file), true);
    
    // Validate album exists
    $albumsDir = __DIR__ . '/albums/';
    $albumFile = $albumsDir . $albumId . '.json';
    
    if (!file_exists($albumFile)) {
        http_response_code(404);
        echo json_encode(['error' => 'Không tìm thấy album']);
        return;
    }
    
    // Validate có ít nhất 1 ảnh hoặc 1 video
    $images = $postData['images'] ?? [];
    $videos = $postData['videos'] ?? [];
    
    if (empty($images) && empty($videos)) {
        http_response_code(400);
        echo json_encode(['error' => 'Post phải có ít nhất 1 ảnh hoặc 1 video']);
        return;
    }
    
    $postsDir = $albumsDir . $albumId . '/posts/';
    if (!is_dir($postsDir)) {
        mkdir($postsDir, 0755, true);
    }
    
    $postId = time() . '-' . uniqid();
    $postFile = $postsDir . $postId . '.json';
    
    $newPostData = [
        'id' => $postId,
        'album_id' => $albumId,
        'title' => $postData['title'] ?? '',
        'description' => $postData['description'] ?? '',
        'images' => $images,
        'videos' => $videos,
        'cosplayer' => $postData['cosplayer'] ?? '',
        'character' => $postData['character'] ?? '',
        'source' => $postData['source'] ?? '',
        'tags' => is_array($postData['tags'] ?? []) ? $postData['tags'] : [],
        'download_links' => is_array($postData['download_links'] ?? []) ? $postData['download_links'] : [],
        'photo_count' => (int)($postData['photo_count'] ?? count($images)),
        'video_count' => (int)($postData['video_count'] ?? count($videos)),
        'source_url' => $postData['source_url'] ?? '',
        'categories' => is_array($postData['categories'] ?? []) ? $postData['categories'] : [],
        'createdDate' => date('c'),
        'updatedDate' => date('c')
    ];
    
    // Set thumbnail
    if (!empty($images)) {
        $newPostData['thumbnail'] = $images[0];
    } else if (!empty($videos)) {
        $newPostData['thumbnail'] = $videos[0];
    }
    
    file_put_contents($postFile, json_encode($newPostData, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
    
    // Update album's updatedDate
    $albumData = json_decode(file_get_contents($albumFile), true);
    $albumData['updatedDate'] = date('c');
    file_put_contents($albumFile, json_encode($albumData, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
    
    echo json_encode([
        'success' => true,
        'post' => $newPostData
    ]);
}

/**
 * List albums
 */
function listAlbums() {
    $albumsDir = __DIR__ . '/albums/';
    
    if (!is_dir($albumsDir)) {
        echo json_encode([]);
        return;
    }
    
    $albums = [];
    $files = scandir($albumsDir);
    
    foreach ($files as $file) {
        if ($file === '.' || $file === '..' || pathinfo($file, PATHINFO_EXTENSION) !== 'json') {
            continue;
        }
        
        $filepath = $albumsDir . $file;
        $data = json_decode(file_get_contents($filepath), true);
        
        if ($data && isset($data['id'])) {
            $albums[] = [
                'id' => $data['id'],
                'title' => $data['title'] ?? 'Untitled Album'
            ];
        }
    }
    
    usort($albums, function($a, $b) {
        return strcmp($a['title'], $b['title']);
    });
    
    echo json_encode($albums);
}

/**
 * Fetch URL with curl
 */
function fetchUrl($url) {
    $ch = curl_init();
    curl_setopt_array($ch, [
        CURLOPT_URL => $url,
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_SSL_VERIFYPEER => false,
        CURLOPT_SSL_VERIFYHOST => false,
        CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        CURLOPT_TIMEOUT => 30,
        CURLOPT_CONNECTTIMEOUT => 10,
        CURLOPT_ENCODING => 'gzip, deflate'
    ]);
    
    $html = curl_exec($ch);
    $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    $error = curl_error($ch);
    curl_close($ch);
    
    if ($httpCode !== 200 || !$html || $error) {
        return false;
    }
    
    return $html;
}

/**
 * Include scraper functions
 */
if (!function_exists('parseCosplaytelePage')) {
    require_once 'scraper_api.php';
}
?>
