const { chromium } = require('playwright');
const path = require('path');
const fs = require('fs');
const readline = require('readline');
const { execSync } = require('child_process');

const MAX_PAGES = 100000; 
const MAX_TENDERS = 1000000; 
const CAPTCHA_TIMEOUT = 11 * 60 * 1000;

// Captcha handling function
async function handleCaptcha(page, context = 'search') {
  const captchaImagePath = path.join(__dirname, 'captcha_images');
  if (!fs.existsSync(captchaImagePath)) {
    fs.mkdirSync(captchaImagePath, { recursive: true });
  }

  try {
    const timestamp = Date.now();
    const fullPageScreenshot = path.join(__dirname, `full_page_${context}_debug_${timestamp}.png`);
    await page.screenshot({ path: fullPageScreenshot, fullPage: true });
    console.log(`Full page screenshot (${context}) saved at: ${fullPageScreenshot}`);
    
    const captchaElement = await page.$('#captchaImage');
    if (!captchaElement) {
      throw new Error('Captcha image element not found');
    }

    const captchaImageFile = path.join(captchaImagePath, `captcha_${context}_${timestamp}.png`);
    await captchaElement.screenshot({ path: captchaImageFile });
    console.log(`Captcha image (${context}) saved at: ${captchaImageFile}`);
    console.log(`Please enter the captcha text for ${context} (you have 11 minutes):`);
    
    const rl = readline.createInterface({
      input: process.stdin,
      output: process.stdout
    });

    const captchaText = await new Promise((resolve, reject) => {
      const timeout = setTimeout(() => {
        rl.close();
        reject(new Error('Captcha input timeout - 11 minutes exceeded'));
      }, CAPTCHA_TIMEOUT);

      rl.question(`Enter captcha text for ${context}: `, (answer) => {
        clearTimeout(timeout);
        rl.close();
        resolve(answer.trim());
      });
    });

    if (!captchaText) {
      throw new Error('No captcha text provided');
    }

    const captchaInput = await page.$('input[name*="captcha"]');
    if (!captchaInput) {
      throw new Error('Captcha input field not found');
    }

    await captchaInput.fill(captchaText);
    console.log(`Captcha entered for ${context}: ${captchaText}`);
    
    fs.unlinkSync(captchaImageFile);
    console.log('Captcha image file cleaned up');
    
    return true;
    
  } catch (error) {
    console.error(`Error in captcha handling (${context}): ${error.message}`);
    throw error;
  }
}

function createTenderFolderName(serialNo, titleText, tenderId = '') {
  // Just return the serial number without date prefix
  const cleanSerial = serialNo.toString().replace(/\.$/, '').trim();
  return cleanSerial;
}

async function extractAndDeleteZip(filePath, extractPath) {
  try {
    console.log(`   Extracting ZIP file: ${path.basename(filePath)}`);
    
    if (!fs.existsSync(extractPath)) {
      fs.mkdirSync(extractPath, { recursive: true });
    }
    
    const unzipCommand = `cd "${extractPath}" && sudo unzip -o "${filePath}"`;
    execSync(unzipCommand, { stdio: 'inherit' });
    
    console.log(`   ZIP extracted to: ${extractPath}`);
    
    execSync(`sudo rm "${filePath}"`);
    console.log(`   ZIP file deleted: ${path.basename(filePath)}`);
    
    return true;
  } catch (error) {
    console.error(`   Error extracting ZIP: ${error.message}`);
    return false;
  }
}

function isTenderAlreadyScraped(downloadPath, tenderId) {
  if (!tenderId) return false;
  
  try {
    const folders = fs.readdirSync(downloadPath);
    
    for (const folder of folders) {
      const tenderJsonPath = path.join(downloadPath, folder, 'tender_details_clean.json');
      
      if (fs.existsSync(tenderJsonPath)) {
        const tenderData = JSON.parse(fs.readFileSync(tenderJsonPath, 'utf8'));
        
        const existingTenderId = tenderData.basicDetails?.['Tender ID'] || 
                                 tenderData.basicDetails?.['Tender Reference Number'];
        
        if (existingTenderId && existingTenderId === tenderId) {
          return true;
        }
      }
    }
    
    return false;
  } catch (error) {
    console.error(`Error checking for duplicate tender: ${error.message}`);
    return false;
  }
}

(async () => {
  const configJson = process.argv[2];
  if (!configJson) {
    console.error('No configuration provided');
    process.exit(1);
  }
  
  let config;
  try {
    config = JSON.parse(configJson);
    console.log(`Starting scraper for: ${config.stateName}`);
  } catch (error) {
    console.error('Invalid configuration JSON:', error.message);
    process.exit(1);
  }
  
  const userDataDir = `./user-data-dir-${config.sourceCode}`;
  
  const context = await chromium.launchPersistentContext(userDataDir, {
    headless: true,
    slowMo: 0
  });
  
  const pages = context.pages();
  const page = pages.length > 0 ? pages[0] : await context.newPage();

  const today = new Date();
  const day = String(today.getDate()).padStart(2, '0');
  const month = String(today.getMonth() + 1).padStart(2, '0');
  const year = today.getFullYear();
  const folderName = `${day}${month}${year}`;
  
  const downloadPath = path.join(__dirname, 'downloads', config.sourceCode, folderName);
  if (!fs.existsSync(downloadPath)) fs.mkdirSync(downloadPath, { recursive: true });

  // console.log(`Created download folder: ${config.sourceCode}/${folderName}`);
  console.log(`Created download folder: ${folderName}`);

  await page.goto(config.url);
  await page.click(config.selectors.firstClick);
  await page.waitForTimeout(1000);

  console.log('Handling search captcha...');
  try {
    await handleCaptcha(page, 'search');
    
    console.log('Clicking search button...');
    
    let searchClicked = false;
    
    const searchSelectors = [
      config.selectors.searchButton,
      '#Submit',
      'input[type="submit"][value*="Search"]',
      'button[type="submit"]',
      'input[type="submit"]'
    ];
    
    for (const selector of searchSelectors) {
      try {
        const searchButton = await page.$(selector);
        if (searchButton) {
          const isVisible = await searchButton.isVisible();
          if (isVisible) {
            console.log(`Found search button with selector: ${selector}`);
            await searchButton.click();
            searchClicked = true;
            break;
          }
        }
      } catch (error) {
        // Continue
      }
    }
    
    if (!searchClicked) {
      const captchaInput = await page.$('input[name*="captcha"]');
      if (captchaInput) {
        await captchaInput.focus();
        await page.keyboard.press('Enter');
        searchClicked = true;
      } else {
        throw new Error('Could not find search button');
      }
    }
    
    await page.waitForSelector('a[id="DirectLink"]', { timeout: 300000 });
    console.log('Search results found!');
    
  } catch (error) {
    console.error('Search captcha handling failed:', error.message);
    await context.close();
    return;
  }

  const results = [];
  const processedSerialNumbers = new Set();
  let pageNo = 1;
  let totalTendersCollected = 0;

  while (pageNo <= MAX_PAGES && totalTendersCollected < MAX_TENDERS) {
    console.log(`Processing page ${pageNo}...`);
    
    const tenderRows = await page.$$('tr[class*="even"], tr[class*="odd"]');
    console.log(`Found ${tenderRows.length} tender rows on page ${pageNo}`);

    for (let rowIndex = 0; rowIndex < tenderRows.length && totalTendersCollected < MAX_TENDERS; rowIndex++) {
      const row = tenderRows[rowIndex];
      
      try {
        const basicTenderData = await row.evaluate((el, pageNum, rowIdx) => {
          const cells = el.querySelectorAll('td');
          const text = (index) => cells[index]?.textContent?.trim() || '';
          const titleCell = cells[4]; 
          const titleLink = titleCell?.querySelector('a');
          
          const titleAndRef = text(4);
          const tenderIdMatch = titleAndRef.match(/(?:Tender\s+ID|ID|Ref\s+No)[:\s]+([A-Z0-9\/\-]+)/i);
          const preliminaryTenderId = tenderIdMatch ? tenderIdMatch[1] : '';
          
          return {
            serialNo: text(0),
            ePublishedDate: text(1),
            bidSubmissionClosingDate: text(2),
            tenderOpeningDate: text(3),
            titleAndRef: text(4),
            organisationChain: text(5),
            titleLinkHref: titleLink?.href || '',
            titleLinkText: titleLink?.textContent?.trim() || '',
            preliminaryTenderId: preliminaryTenderId,
            pageNumber: pageNum,
            rowIndex: rowIdx
          };
        }, pageNo, rowIndex);

        if (basicTenderData.titleLinkHref && basicTenderData.titleLinkText && basicTenderData.serialNo) {
          const uniqueKey = `P${pageNo}_S${basicTenderData.serialNo}`;
          
          if (basicTenderData.preliminaryTenderId && 
              isTenderAlreadyScraped(downloadPath, basicTenderData.preliminaryTenderId)) {
            console.log(`   Skipping S.No ${basicTenderData.serialNo} - Already scraped`);
            continue;
          }
          
          if (!processedSerialNumbers.has(uniqueKey)) {
            processedSerialNumbers.add(uniqueKey);
            totalTendersCollected++;
            
            console.log(`\nProcessing tender ${totalTendersCollected}: S.No ${basicTenderData.serialNo} (Row ${rowIndex + 1}/${tenderRows.length}, Page ${pageNo})`);
            console.log(`   ${basicTenderData.titleLinkText.substring(0, 80)}...`);
            
            try {
              const tenderFolderName = createTenderFolderName(basicTenderData.serialNo, basicTenderData.titleLinkText);
              const tenderFolder = path.join(downloadPath, tenderFolderName);
              if (!fs.existsSync(tenderFolder)) fs.mkdirSync(tenderFolder, { recursive: true });

              const tenderDocsFolder = path.join(tenderFolder, 'docs');
              if (!fs.existsSync(tenderDocsFolder)) fs.mkdirSync(tenderDocsFolder, { recursive: true });

              const newTab = await context.newPage();
              console.log(`   Opening new tab for: ${basicTenderData.titleLinkHref}`);
              
              await newTab.goto(basicTenderData.titleLinkHref);
              await newTab.waitForLoadState('networkidle');
              await newTab.waitForTimeout(2000);

              const detailedData = await scrapeTenderDetails(newTab, tenderFolder, tenderDocsFolder, basicTenderData, context);

              const completeData = {
                ...basicTenderData,
                ...detailedData,
                extractedAt: new Date().toISOString(),
                folderPath: tenderFolder,
                folderName: tenderFolderName
              };

              results.push(completeData);
              console.log(`   Tender ${totalTendersCollected} (S.No: ${basicTenderData.serialNo}) processed successfully`);

              await newTab.close();
              console.log(`   Tab closed`);
              await page.waitForTimeout(500);

            } catch (error) {
              console.error(`   Error processing tender: ${error.message}`);
            }
          } else {
            console.log(`   Skipping duplicate: S.No ${basicTenderData.serialNo}`);
          }
        }
      } catch (error) {
        console.error(`   Error extracting data from row ${rowIndex + 1}: ${error.message}`);
      }
    }

    console.log(`\nPage ${pageNo} completed. Processed ${totalTendersCollected} tenders so far.`);
    
    if (pageNo < MAX_PAGES && totalTendersCollected < MAX_TENDERS) {
      console.log(`Checking for next page...`);
      
      try {
        await page.waitForTimeout(2000);
        
        const nextLinkHandle = await page.evaluateHandle(() => {
          const links = Array.from(document.querySelectorAll('a'));
          return links.find(link => link.textContent.trim().includes('Next'));
        });
        
        const nextLink = nextLinkHandle.asElement();
        
        if (nextLink) {
          console.log(`Found Next link, clicking...`);
          await nextLink.click();
          await page.waitForLoadState('networkidle');
          await page.waitForTimeout(5000);
          
          const tenderRows = await page.$$('tr[class*="even"], tr[class*="odd"]');
          
          if (tenderRows.length > 0) {
            console.log(`Page ${pageNo + 1} loaded with ${tenderRows.length} rows`);
            pageNo++;
            continue;
          } else {
            console.log(`No rows found - stopping pagination`);
            break;
          }
        } else {
          console.log(`Next link not found. Reached last page.`);
          break;
        }
      } catch (error) {
        console.error(`Error during pagination: ${error.message}`);
        break;
      }
    } else {
      console.log(`Stopping: Reached MAX_PAGES or MAX_TENDERS limit`);
      break;
    }
  }

  const processedSerials = Array.from(processedSerialNumbers).map(key => key.split('_S')[1]).sort((a, b) => parseInt(a) - parseInt(b));
  
  fs.writeFileSync(path.join(downloadPath, 'processing_summary.json'), JSON.stringify({
    totalTendersProcessed: totalTendersCollected,
    pagesProcessed: pageNo,
    processedAt: new Date().toISOString(),
    downloadFolder: `${config.sourceCode}/${folderName}`,
    serialNumbersProcessed: processedSerials,
    tenders: results
  }, null, 2));

  console.log(`\nScraping completed!`);
  console.log(`Results saved in: ${downloadPath}`);
  console.log(`Total tenders processed: ${totalTendersCollected}`);
  console.log(`Serial numbers processed: ${processedSerials.join(', ')}`);
  
  await context.close();
})();

async function scrapeTenderDetails(detailPage, tenderFolder, tenderDocsFolder, basicTenderData, context) {
  try {
    console.log(`   Scraping tender details and downloading documents...`);
    
    await detailPage.waitForLoadState('networkidle');
    await detailPage.waitForTimeout(2000);

    const downloadedDocs = await downloadTenderDocuments(detailPage, tenderDocsFolder, context);

    const structuredData = await detailPage.evaluate(() => {
      
      const cleanText = (text) => {
        return text.replace(/\s+/g, ' ')
                  .replace(/[^\x20-\x7E]/g, '') 
                  .replace(/tttttt+/g, '') 
                  .trim();
      };

      const targetFields = [
        'Organisation Chain', 'Tender Reference Number', 'Tender ID', 'Withdrawal Allowed',
        'Tender Type', 'Form Of Contract', 'Tender Category', 'No. of Covers',
        'General Technical Evaluation Allowed', 'ItemWise Technical Evaluation Allowed',
        'Payment Mode', 'Is Multi Currency Allowed For BOQ', 'Is Multi Currency Allowed For Fee',
        'Allow Two Stage Bidding', 'EMD Amount in', 'EMD Exemption Allowed', 'EMD Fee Type',
        'EMD Percentage', 'EMD Payable To', 'EMD Payable At', 'Title', 'Work Description',
        'NDA/Pre Qualification', 'Independent External Monitor/Remarks', 'Tender Value in',
        'Product Category', 'Contract Type', 'Bid Validity(Days)', 'Location', 'Pincode',
        'Pre Bid Meeting Address', 'Pre Bid Meeting Date', 'Should Allow NDA Tender',
        'Allow Preferential Bidder', 'Published Date', 'Bid Opening Date',
        'Document Download / Sale Start Date', 'Document Download / Sale End Date',
        'Clarification Start Date', 'Clarification End Date', 'Bid Submission Start Date',
        'Bid Submission End Date', 'Name', 'Address'
      ];

      const extractSpecificFields = () => {
        const tables = Array.from(document.querySelectorAll('table'));
        const basicDetails = {};
        
        tables.forEach(table => {
          const rows = Array.from(table.querySelectorAll('tr'));
          
          rows.forEach(row => {
            const cells = Array.from(row.querySelectorAll('td'));
            
            if (cells.length >= 2) {
              const key = cleanText(cells[0].textContent);
              const value = cleanText(cells[1].textContent);
              
              if (targetFields.includes(key) && value && key !== value) {
                basicDetails[key] = value;
              }
            }
            
            if (cells.length >= 4) {
              const key2 = cleanText(cells[2].textContent);
              const value2 = cleanText(cells[3].textContent);
              
              if (targetFields.includes(key2) && value2 && key2 !== value2) {
                basicDetails[key2] = value2;
              }
            }
          });
        });
        
        return basicDetails;
      };

      return {
        url: window.location.href,
        pageTitle: cleanText(document.title || ''),
        extractedAt: new Date().toISOString(),
        basicDetails: extractSpecificFields()
      };
    });

    const cleanedData = {
      url: structuredData.url,
      pageTitle: structuredData.pageTitle,
      extractedAt: structuredData.extractedAt,
      basicDetails: structuredData.basicDetails,
      downloadedDocuments: downloadedDocs
    };

    const fileName = `tender_details_clean.json`;
    fs.writeFileSync(path.join(tenderFolder, fileName), JSON.stringify(cleanedData, null, 2));
    
    console.log(`   Clean tender details saved to: ${fileName}`);
    console.log(`   Basic details extracted: ${Object.keys(cleanedData.basicDetails).length} fields`);
    console.log(`   Documents downloaded: ${downloadedDocs.length}`);
    
    return {
      structuredData: cleanedData,
      dataFile: fileName,
      documentsDownloaded: downloadedDocs.length
    };

  } catch (error) {
    console.error(`   Error scraping tender details: ${error.message}`);
    return { error: error.message };
  }
}

let captchaAlreadySolved = false;

async function downloadTenderDocuments(detailPage, tenderDocsFolder, context) {
  const downloadedDocs = [];
  
  try {
    console.log(`   Looking for document links...`);
    
    await detailPage.waitForLoadState('networkidle');
    await detailPage.waitForTimeout(2000);
    
    detailPage.on('download', async (download) => {
      try {
        const suggestedFilename = download.suggestedFilename() || 'document.pdf';
        const filePath = path.join(tenderDocsFolder, suggestedFilename);
        await download.saveAs(filePath);
        
        const fileSize = fs.existsSync(filePath) ? fs.statSync(filePath).size : 0;
        const isZip = suggestedFilename.toLowerCase().endsWith('.zip');
        
        downloadedDocs.push({
          fileName: suggestedFilename,
          filePath,
          downloadedAt: new Date().toISOString(),
          fileSize: fileSize,
          fileType: isZip ? 'zip' : 'pdf'
        });
        
        console.log(`   Downloaded: ${suggestedFilename} (${fileSize} bytes)`);
        
        if (isZip) {
          const extractPath = path.join(tenderDocsFolder, 'extracted');
          const extracted = await extractAndDeleteZip(filePath, extractPath);
          if (extracted) {
            const lastDoc = downloadedDocs[downloadedDocs.length - 1];
            lastDoc.extracted = true;
            lastDoc.extractPath = extractPath;
          }
        }
        
      } catch (downloadError) {
        console.error(`   Download save error: ${downloadError.message}`);
      }
    });

    let documentClicked = false;
    let selectorUsed = '';

    if (!captchaAlreadySolved) {
      try {
        const docDownloadElement = await detailPage.$('#docDownoad');
        if (docDownloadElement) {
          await detailPage.click('#docDownoad');
          documentClicked = true;
          selectorUsed = '#docDownoad';
          await detailPage.waitForTimeout(2000);
        }
      } catch (error) {
        console.log(`   #docDownoad not found`);
      }
    } else {
      try {
        const directLinkElement = await detailPage.$('#DirectLink_0');
        if (directLinkElement) {
          await detailPage.click('#DirectLink_0');
          documentClicked = true;
          selectorUsed = '#DirectLink_0';
          await detailPage.waitForTimeout(2000);
        }
      } catch (error) {
        console.log(`   #DirectLink_0 not found`);
      }
    }

    if (!documentClicked) {
      const fallbackSelectors = ['#DirectLink_1', '#DirectLink_2', '#docDownoad'];
      
      for (const selector of fallbackSelectors) {
        try {
          const element = await detailPage.$(selector);
          if (element) {
            await detailPage.click(selector);
            documentClicked = true;
            selectorUsed = selector;
            await detailPage.waitForTimeout(2000);
            break;
          }
        } catch (error) {
          // Continue
        }
      }
    }

    if (!documentClicked) {
      documentClicked = await detailPage.evaluate(() => {
        const allLinks = Array.from(document.querySelectorAll('a'));
        for (const link of allLinks) {
          const linkText = link.textContent.trim();
          if (linkText.toLowerCase().includes('tendernotice')) {
            link.click();
            return true;
          }
        }
        return false;
      });
      
      if (documentClicked) {
        selectorUsed = 'text-based';
        await detailPage.waitForTimeout(2000);
      }
    }

    if (!documentClicked) {
      console.log(`   No document links found`);
      return [];
    }

    console.log(`   Used selector: ${selectorUsed}`);

    if (!captchaAlreadySolved) {
      const currentUrl = detailPage.url();
      const pageContent = await detailPage.content();
      
      const isCaptchaPage = (
        pageContent.toLowerCase().includes('captcha') ||
        currentUrl.includes('captcha') ||
        await detailPage.$('#captchaImage') !== null
      );

      if (isCaptchaPage) {
        console.log(`   CAPTCHA PAGE DETECTED FOR TENDER DOCUMENTS!`);
        
        await handleCaptcha(detailPage, 'tender_documents');
        
        let submitClicked = false;
        
        const submitSelectors = [
          'input[type="submit"]',
          'button[type="submit"]',
          'input[value*="Submit"]'
        ];
        
        for (const selector of submitSelectors) {
          try {
            const submitButton = await detailPage.$(selector);
            if (submitButton) {
              const isVisible = await submitButton.isVisible();
              if (isVisible) {
                await submitButton.click();
                submitClicked = true;
                break;
              }
            }
          } catch (error) {
            // Continue
          }
        }
        
        if (!submitClicked) {
          const captchaInput = await detailPage.$('input[name*="captcha"]');
          if (captchaInput) {
            await captchaInput.focus();
            await detailPage.keyboard.press('Enter');
            submitClicked = true;
          }
        }
        
        if (submitClicked) {
          captchaAlreadySolved = true;
          console.log(`   Tender document captcha solved!`);
          await detailPage.waitForTimeout(4000);
        }
        
        await detailPage.waitForTimeout(1500);
        
        const additionalClicks = await detailPage.evaluate(() => {
          const allLinks = Array.from(document.querySelectorAll('a'));
          let clickCount = 0;
          
          for (const link of allLinks) {
            const linkText = link.textContent.trim();
            if (linkText.toLowerCase().includes('tendernotice')) {
              link.click();
              clickCount++;
            }
          }
          
          return clickCount;
        });
        
        if (additionalClicks > 0) {
          await detailPage.waitForTimeout(6000);
        }
      }
    } else {
      await detailPage.waitForTimeout(6000);
    }

    if (captchaAlreadySolved || selectorUsed === '#docDownoad') {
      try {
        const zipElement = await detailPage.$('#DirectLink_7');
        if (zipElement) {
          await detailPage.click('#DirectLink_7');
          await detailPage.waitForTimeout(4000);
        }
      } catch (error) {
        // ZIP not found
      }
    }

    if (downloadedDocs.length > 0) {
      console.log(`   Successfully downloaded ${downloadedDocs.length} documents!`);
    }

  } catch (error) {
    console.error(`   Error downloading documents: ${error.message}`);
  }
  
  return downloadedDocs;
}
