# bid_processing_apis.py
"""
Bid Processing API Routes for Minaions Tender System
To be imported and included in main app.py
"""

import os
import json
import logging
from typing import Dict, List, Optional, Any
from fastapi import APIRouter, HTTPException, Depends, Header, BackgroundTasks
from pydantic import BaseModel, Field
import time
import uuid
from datetime import datetime
from pathlib import Path

# Import the processing modules
import refine_gem_bid_data as gem_refine
import seller_analysis as seller_stats
import process_eproc_data as eproc

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Create router for bid processing
router = APIRouter(prefix="/api/bid-processing", tags=["Bid Processing"])

# =================================================================
# REQUEST/RESPONSE MODELS
# =================================================================

class ProcessGeMBidsRequest(BaseModel):
    input_file_path: str = Field(..., description="Path to input gem_bids_complete.json")
    tenant_id: str = Field(..., description="Tenant ID")
    output_file_name: str = Field(default="processed_bids_data.json", description="Output filename")

class AnalyzeSellersRequest(BaseModel):
    input_file_path: str = Field(..., description="Path to processed bids JSON file")
    tenant_id: str = Field(..., description="Tenant ID")
    output_file_name: str = Field(default="seller_analysis_output.json", description="Output filename")
    search_keywords: Optional[List[str]] = Field(None, description="Keywords to filter bids")
    search_mode: str = Field(default="any", description="Search mode: 'any' or 'all'")

class ProcessEProcBidsRequest(BaseModel):
    root_directory: str = Field(..., description="Root directory containing bid folders")
    tenant_id: str = Field(..., description="Tenant ID")
    output_file_name: str = Field(default="processed_bids_data.json", description="Output filename")

class GetBidAnalyticsRequest(BaseModel):
    analysis_file_path: str = Field(..., description="Path to seller analysis JSON file")
    tenant_id: str = Field(..., description="Tenant ID")

class ExportBidsRequest(BaseModel):
    processed_bids_file: str = Field(..., description="Path to processed bids JSON")
    tenant_id: str = Field(..., description="Tenant ID")
    export_format: str = Field(default="json", description="Export format: json or csv")
    limit: Optional[int] = Field(None, description="Limit number of records to export")

class ProcessGeMBidsDirectRequest(BaseModel):
    bids_data: Dict[str, Any] = Field(..., description="Raw bids JSON data")
    tenant_id: str = Field(..., description="Tenant ID")
    search_keywords: Optional[List[str]] = Field(None, description="Keywords to filter sellers")
    search_mode: str = Field(default="any", description="Search mode: 'any' or 'all'")

class ProcessGeMBidsFullRequest(BaseModel):
    input_file_path: str = Field(..., description="Path to input gem_bids_complete.json")
    tenant_id: str = Field(..., description="Tenant ID")
    search_keywords: Optional[List[str]] = Field(None, description="Keywords to filter sellers")
    search_mode: str = Field(default="any", description="Search mode: 'any' or 'all'")
    return_type: str = Field(default="json", description="Return type: 'json' (response body) or 'file' (save to disk)")
    output_file_path: Optional[str] = Field(None, description="Full path for output file. If not provided, defaults to /tmp/bid_processing/{tenant_id}/combined_processing/seller_analysis_output.json")

class ProcessingResult(BaseModel):
    status: str
    message: str
    file_path: Optional[str]
    processing_time: float
    record_count: int
    details: Dict[str, Any]

class SellerAnalyticsResult(BaseModel):
    status: str
    message: str
    file_path: Optional[str]
    total_sellers: int
    total_bids_analyzed: int
    top_sellers: List[Dict[str, Any]]
    analysis_metadata: Dict[str, Any]

class BidAnalyticsResponse(BaseModel):
    status: str
    data: Dict[str, Any]
    summary: Dict[str, Any]

# =================================================================
# UTILITY FUNCTIONS
# =================================================================

def get_tenant_output_dir(tenant_id: str, process_type: str) -> Path:
    """Get tenant-specific output directory"""
    base_dir = Path("/tmp/bid_processing") / tenant_id / process_type
    base_dir.mkdir(parents=True, exist_ok=True)
    return base_dir

def verify_file_exists(file_path: str) -> bool:
    """Verify that a file exists"""
    return os.path.exists(file_path)

def verify_directory_exists(dir_path: str) -> bool:
    """Verify that a directory exists"""
    return os.path.isdir(dir_path)

def count_folders_with_required_files(root_dir: str) -> tuple:
    """
    Count folders with required files (PDF and JSON)
    Returns: (total_folders, folders_with_files, missing_count)
    """
    root_path = Path(root_dir)
    total_folders = 0
    valid_folders = 0
    invalid_folders = 0
    
    if not root_path.exists():
        return 0, 0, 0
    
    for folder in root_path.iterdir():
        if folder.is_dir():
            total_folders += 1
            has_pdf = any(f.suffix.lower() == '.pdf' for f in folder.iterdir() if f.is_file())
            has_json = any(f.name == 'stage_summary_data.json' for f in folder.iterdir() if f.is_file())
            
            if has_pdf and has_json:
                valid_folders += 1
            else:
                invalid_folders += 1
    
    return total_folders, valid_folders, invalid_folders


# ============================================================================
# ✅ NEW: Task tracking system
# ============================================================================

TASK_REGISTRY_DIR = Path("/tmp/bid_processing/.task_registry")
TASK_REGISTRY_DIR.mkdir(parents=True, exist_ok=True)

def create_task_id() -> str:
    """Generate unique task ID"""
    return str(uuid.uuid4())

def save_task_metadata(task_id: str, tenant_id: str, root_directory: str, output_file_name: str):
    """Save task metadata for later status lookup"""
    task_file = TASK_REGISTRY_DIR / f"{task_id}.json"
    metadata = {
        "task_id": task_id,
        "tenant_id": tenant_id,
        "root_directory": root_directory,
        "output_file_name": output_file_name,
        "created_at": datetime.now().isoformat(),
        "status": "processing"
    }
    with open(task_file, 'w') as f:
        json.dump(metadata, f)
    logger.info(f"Task metadata saved for {task_id}")

def get_task_metadata(task_id: str) -> Optional[Dict]:
    """Retrieve task metadata"""
    task_file = TASK_REGISTRY_DIR / f"{task_id}.json"
    if task_file.exists():
        with open(task_file, 'r') as f:
            return json.load(f)
    return None

def update_task_status(task_id: str, status: str, result_data: Dict = None):
    """Update task status and result"""
    task_file = TASK_REGISTRY_DIR / f"{task_id}.json"
    if task_file.exists():
        with open(task_file, 'r') as f:
            metadata = json.load(f)
        
        metadata["status"] = status
        metadata["completed_at"] = datetime.now().isoformat()
        if result_data:
            metadata["result"] = result_data
        
        with open(task_file, 'w') as f:
            json.dump(metadata, f)
        logger.info(f"Task {task_id} status updated to {status}")

# ============================================================================
# ✅ NEW: Background task wrapper
# ============================================================================

def process_eproc_bids_background_wrapper(task_id: str):
    """Wrapper to run handler and update task status"""
    try:
        logger.info(f"Starting background processing for task {task_id}")
        
        metadata = get_task_metadata(task_id)
        if not metadata:
            logger.error(f"Task metadata not found for {task_id}")
            return
        
        # Run the actual processing
        result = process_eproc_bids_handler(
            metadata["root_directory"],
            metadata["tenant_id"],
            metadata["output_file_name"]
        )
        
        # Update task status with result
        update_task_status(task_id, "completed", result)
        logger.info(f"Task {task_id} completed successfully")
        
    except Exception as e:
        logger.error(f"Task {task_id} failed: {str(e)}", exc_info=True)
        update_task_status(task_id, "failed", {
            "error": str(e),
            "error_type": type(e).__name__
        })

# =================================================================
# PROCESSING FUNCTIONS
# =================================================================

def process_gem_bids_handler(input_file: str, tenant_id: str, output_file: str) -> Dict[str, Any]:
    """Process GeM bid data"""
    start_time = time.time()
    try:
        logger.info(f"Starting GeM bid processing for tenant {tenant_id}")
        
        # Verify input file exists
        if not verify_file_exists(input_file):
            raise FileNotFoundError(f"Input file not found: {input_file}")
        
        # Get output directory
        output_dir = get_tenant_output_dir(tenant_id, "gem_processing")
        output_path = output_dir / output_file
        
        # Load the JSON data
        with open(input_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        logger.info(f"Loaded {len(data.get('bids', []))} bids from input file")
        
        # Process the bids data using refine_gem_bid_data module
        processed_data = gem_refine.process_bids_data(data)
        
        # Save processed data
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(processed_data, f, indent=2, ensure_ascii=False)
        
        processing_time = time.time() - start_time
        
        # Calculate seller stats
        total_sellers = sum(
            len(bid.get('detailedInfo', {}).get('sections', {}).get('evaluation', {}).get('sellers', []))
            for bid in processed_data['bids']
        )
        
        result = {
            "status": "success",
            "message": f"Successfully processed {len(processed_data['bids'])} bids",
            "file_path": str(output_path),
            "processing_time": processing_time,
            "record_count": len(processed_data['bids']),
            "details": {
                "total_sellers": total_sellers,
                "metadata": processed_data.get('metadata', {})
            }
        }
        
        logger.info(f"GeM bid processing completed in {processing_time:.2f}s")
        return result
        
    except Exception as e:
        logger.error(f"Error processing GeM bids: {str(e)}", exc_info=True)
        return {
            "status": "error",
            "message": f"Failed to process GeM bids: {str(e)}",
            "file_path": None,
            "processing_time": time.time() - start_time,
            "record_count": 0,
            "details": {"error": str(e)}
        }

def analyze_sellers_handler(input_file: str, tenant_id: str, output_file: str,
                           search_keywords: Optional[List[str]] = None,
                           search_mode: str = "any") -> Dict[str, Any]:
    """Analyze sellers from processed bids"""
    start_time = time.time()
    try:
        logger.info(f"Starting seller analysis for tenant {tenant_id}")
        
        # Verify input file exists
        if not verify_file_exists(input_file):
            raise FileNotFoundError(f"Input file not found: {input_file}")
        
        # Get output directory
        output_dir = get_tenant_output_dir(tenant_id, "seller_analysis")
        output_path = output_dir / output_file
        
        # Run seller analysis
        seller_stats.analyze_bidders(
            input_file,
            str(output_path),
            search_keywords=search_keywords,
            search_mode=search_mode
        )
        
        # Load results to get summary
        with open(output_path, 'r', encoding='utf-8') as f:
            results = json.load(f)
        
        processing_time = time.time() - start_time
        
        # Get top 5 sellers
        top_sellers = results.get('sellers', [])[:5]
        
        result = {
            "status": "success",
            "message": f"Successfully analyzed {len(results.get('sellers', []))} unique sellers",
            "file_path": str(output_path),
            "total_sellers": results['metadata'].get('total_unique_sellers', 0),
            "total_bids_analyzed": sum(s.get('total_bids_applied', 0) for s in results.get('sellers', [])),
            "top_sellers": top_sellers,
            "analysis_metadata": {
                "analysis_date": results['metadata'].get('analysis_date'),
                "source_total_bids": results['metadata'].get('source_total_bids'),
                "filter_applied": results['metadata'].get('filter_applied'),
                "processing_time": processing_time
            }
        }
        
        logger.info(f"Seller analysis completed in {processing_time:.2f}s")
        return result
        
    except Exception as e:
        logger.error(f"Error analyzing sellers: {str(e)}", exc_info=True)
        return {
            "status": "error",
            "message": f"Failed to analyze sellers: {str(e)}",
            "file_path": None,
            "total_sellers": 0,
            "total_bids_analyzed": 0,
            "top_sellers": [],
            "analysis_metadata": {"error": str(e), "processing_time": time.time() - start_time}
        }

def process_eproc_bids_handler(root_directory: str, tenant_id: str, output_file: str) -> Dict[str, Any]:
    """
    Process eproc bid data from work order PDFs and JSON files
    
    Expected folder structure:
    root_directory/
    ├── folder_1/
    │   ├── work_order.pdf (or any .pdf file)
    │   └── stage_summary_data.json
    ├── folder_2/
    │   ├── work_order.pdf
    │   └── stage_summary_data.json
    ...
    """
    start_time = time.time()
    try:
        logger.info(f"Starting eproc bid processing for tenant {tenant_id}")
        logger.info(f"Root directory: {root_directory}")
        
        # Verify root directory exists
        if not verify_directory_exists(root_directory):
            raise NotADirectoryError(f"Root directory not found: {root_directory}")
        
        # Get output directory
        output_dir = get_tenant_output_dir(tenant_id, "eproc_processing")
        output_path = output_dir / output_file
        
        # Count folders and validate
        total_folders, valid_folders, invalid_folders = count_folders_with_required_files(root_directory)
        
        logger.info(f"Found {total_folders} folders: {valid_folders} valid, {invalid_folders} invalid")
        
        if total_folders == 0:
            raise ValueError(f"No folders found in {root_directory}")
        
        if valid_folders == 0:
            raise ValueError(f"No valid bid folders found. Each folder must contain a PDF file and 'stage_summary_data.json'")
        
        # Initialize the eproc processor
        processor = eproc.BidProcessor(root_directory, str(output_path), os.getenv("ANTHROPIC_API_KEY"))
        
        # Process all folders
        logger.info(f"Starting to process {valid_folders} valid folders...")
        result_data = processor.process_all_folders()
        
        processing_time = time.time() - start_time
        
        # Calculate statistics
        total_bids = len(result_data.get('bids', []))
        total_sellers = sum(
            len(bid.get('detailedInfo', {}).get('sections', {}).get('evaluation', {}).get('sellers', []))
            for bid in result_data.get('bids', [])
        )
        
        final_result = {
            "status": "success",
            "message": f"Successfully processed {total_bids} eproc bids from {valid_folders} folders",
            "file_path": str(output_path),
            "processing_time": processing_time,
            "record_count": total_bids,
            "details": {
                "total_sellers": total_sellers,
                "folders_processed": valid_folders,
                "folders_skipped": invalid_folders,
                "total_folders_found": total_folders,
                "metadata": result_data.get('metadata', {}),
                "processing_summary": {
                    "bids_with_detailed_info": result_data.get('metadata', {}).get('withDetailedInfo', 0),
                    "detail_fetch_success_rate": result_data.get('metadata', {}).get('detailFetchSuccess', '0%')
                }
            }
        }
        
        logger.info(f"eproc bid processing completed in {processing_time:.2f}s")
        logger.info(f"Processed {total_bids} bids with {total_sellers} unique sellers")
        
        return final_result
        
    except NotADirectoryError as e:
        logger.error(f"Directory error in eproc processing: {str(e)}")
        return {
            "status": "error",
            "message": f"Directory error: {str(e)}",
            "file_path": None,
            "processing_time": time.time() - start_time,
            "record_count": 0,
            "details": {"error": str(e), "error_type": "directory_not_found"}
        }
    
    except ValueError as e:
        logger.error(f"Validation error in eproc processing: {str(e)}")
        return {
            "status": "error",
            "message": f"Validation error: {str(e)}",
            "file_path": None,
            "processing_time": time.time() - start_time,
            "record_count": 0,
            "details": {"error": str(e), "error_type": "validation_error"}
        }
    
    except Exception as e:
        logger.error(f"Error processing eproc bids: {str(e)}", exc_info=True)
        return {
            "status": "error",
            "message": f"Failed to process eproc bids: {str(e)}",
            "file_path": None,
            "processing_time": time.time() - start_time,
            "record_count": 0,
            "details": {"error": str(e), "error_type": "processing_error"}
        }

def get_bid_analytics_handler(analysis_file: str, tenant_id: str) -> Dict[str, Any]:
    """Get analytics from bid analysis data"""
    try:
        logger.info(f"Generating bid analytics for tenant {tenant_id}")
        
        if not verify_file_exists(analysis_file):
            raise FileNotFoundError(f"Analysis file not found: {analysis_file}")
        
        with open(analysis_file, 'r', encoding='utf-8') as f:
            analysis_data = json.load(f)
        
        sellers = analysis_data.get('sellers', [])
        metadata = analysis_data.get('metadata', {})
        
        total_bids = sum(s.get('total_bids_applied', 0) for s in sellers)
        total_l1_winners = sum(s.get('total_bids_won_l1', 0) for s in sellers)
        
        # Calculate analytics
        analytics = {
            "total_sellers": len(sellers),
            "total_bids": total_bids,
            "total_qualified": sum(s.get('total_bids_qualified', 0) for s in sellers),
            "total_disqualified": sum(s.get('total_bids_disqualified', 0) for s in sellers),
            "total_l1_winners": total_l1_winners,
            "total_amount_won": metadata.get('total_amount_won_all_l1_bids', '₹0.00'),
            "average_bids_per_seller": total_bids / len(sellers) if sellers else 0,
            "success_rate": (total_l1_winners / total_bids * 100) if total_bids > 0 else 0,
            "top_10_sellers": sellers[:10],
            "seller_locations": list(set([loc for s in sellers for loc in s.get('seller_locations', [])])),
            "special_categories": {
                "mse": len([s for s in sellers if 'MSE' in s.get('special_status', '')]),
                "mii": len([s for s in sellers if 'MII' in s.get('special_status', '')])
            }
        }
        
        result = {
            "status": "success",
            "data": analytics,
            "summary": {
                "metadata": metadata,
                "generated_at": datetime.now().isoformat()
            }
        }
        
        return result
        
    except Exception as e:
        logger.error(f"Error generating analytics: {str(e)}", exc_info=True)
        return {
            "status": "error",
            "data": {},
            "summary": {"error": str(e), "generated_at": datetime.now().isoformat()}
        }

# =================================================================
# API ENDPOINTS
# =================================================================

@router.post("/process/gem-bids", response_model=ProcessingResult)
async def process_gem_bids(
    request: ProcessGeMBidsRequest,
    background_tasks: BackgroundTasks,
    api_key: str = Depends(lambda: os.environ.get("BID_PROCESSING_API_KEY", "default-key"))
):
    """Process GeM bid data and refine it"""
    try:
        result = process_gem_bids_handler(
            request.input_file_path,
            request.tenant_id,
            request.output_file_name
        )
        
        if result["status"] != "success":
            raise HTTPException(status_code=500, detail=result["message"])
            
        return ProcessingResult(**result)
            
    except Exception as e:
        logger.error(f"Error in process_gem_bids endpoint: {e}")
        raise HTTPException(status_code=500, detail=str(e))

# ============================================================================
# ✅ FIXED: Process eproc bids endpoint with task tracking
# ============================================================================

@router.post("/process/eproc-bids", response_model=ProcessingResult)
async def process_eproc_bids(
    request: ProcessEProcBidsRequest,
    background_tasks: BackgroundTasks,
    api_key: str = Depends(lambda: os.environ.get("BID_PROCESSING_API_KEY", "default-key"))
):
    """
    Process eproc bid data from work order PDFs and JSON files (Non-blocking)
    
    Returns task_id that can be used to check status via GET /process/eproc-bids/status/{task_id}
    """
    try:
        logger.info(f"Received eproc processing request for tenant: {request.tenant_id}")
        
        # ✅ Generate task ID
        task_id = create_task_id()
        logger.info(f"Created task {task_id} for tenant {request.tenant_id}")
        
        # ✅ Save task metadata
        save_task_metadata(
            task_id,
            request.tenant_id,
            request.root_directory,
            request.output_file_name
        )
        
        # ✅ Add background task with wrapper
        background_tasks.add_task(
            process_eproc_bids_background_wrapper,
            task_id
        )
        
        # ✅ Return task_id to client
        return ProcessingResult(
            status="processing",
            message=f"eproc processing started - task ID: {task_id}",
            file_path=None,
            processing_time=0,
            record_count=0,
            details={
                "status": "processing_in_background",
                "task_id": task_id,
                "poll_url": f"/api/bid-processing/process/eproc-bids/status/{task_id}"
            }
        )
            
    except Exception as e:
        logger.error(f"Error initiating eproc processing: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e))

# ============================================================================
# ✅ FIXED: Status endpoint with correct task lookup
# ============================================================================

@router.get("/process/eproc-bids/status/{task_id}")
async def get_eproc_status(task_id: str):
    """
    Check processing status using task_id
    
    Returns:
    - processing: Still running
    - completed: Finished successfully with results
    - failed: Error occurred
    - error: Task not found or invalid
    """
    try:
        logger.info(f"Checking status for task {task_id}")
        
        # ✅ Get task metadata
        metadata = get_task_metadata(task_id)
        
        if not metadata:
            logger.warning(f"Task not found: {task_id}")
            return {
                "status": "error",
                "task_id": task_id,
                "error": "Task not found",
                "message": f"No task found with ID: {task_id}"
            }
        
        # ✅ Return current status
        if metadata["status"] == "processing":
            elapsed = datetime.fromisoformat(metadata["created_at"])
            elapsed_minutes = (datetime.now() - elapsed).total_seconds() / 60
            return {
                "status": "processing",
                "task_id": task_id,
                "tenant_id": metadata.get("tenant_id"),
                "created_at": metadata.get("created_at"),
                "elapsed_minutes": round(elapsed_minutes, 1),
                "message": "Processing in progress. Check again in a few minutes.",
                "estimated_wait": "5-15 minutes depending on file size"
            }
        
        elif metadata["status"] == "completed":
            result = metadata.get("result", {})
            return {
                "status": "completed",
                "task_id": task_id,
                "tenant_id": metadata.get("tenant_id"),
                "created_at": metadata.get("created_at"),
                "completed_at": metadata.get("completed_at"),
                "record_count": result.get("record_count"),
                "file_path": result.get("file_path"),
                "processing_time": result.get("processing_time"),
                "message": result.get("message"),
                "details": result.get("details")
            }
        
        elif metadata["status"] == "failed":
            result = metadata.get("result", {})
            return {
                "status": "failed",
                "task_id": task_id,
                "tenant_id": metadata.get("tenant_id"),
                "created_at": metadata.get("created_at"),
                "completed_at": metadata.get("completed_at"),
                "error": result.get("error"),
                "error_type": result.get("error_type"),
                "message": f"Processing failed: {result.get('error')}"
            }
        
        else:
            return {
                "status": "unknown",
                "task_id": task_id,
                "metadata_status": metadata["status"],
                "message": f"Unknown status: {metadata['status']}"
            }
        
    except Exception as e:
        logger.error(f"Error checking task status: {e}", exc_info=True)
        return {
            "status": "error",
            "task_id": task_id,
            "error": str(e),
            "message": "Error checking task status"
        }

# @router.post("/process/eproc-bids", response_model=ProcessingResult)
# async def process_eproc_bids(
#     request: ProcessEProcBidsRequest,
#     background_tasks: BackgroundTasks,
#     api_key: str = Depends(lambda: os.environ.get("BID_PROCESSING_API_KEY", "default-key"))
# ):
#     """
#     Process eproc bid data from work order PDFs and JSON files
    
#     Expected folder structure:
#     root_directory/
#     ├── bid_folder_1/
#     │   ├── work_order.pdf
#     │   └── stage_summary_data.json
#     ├── bid_folder_2/
#     │   ├── work_order.pdf
#     │   └── stage_summary_data.json
    
#     Each folder must contain:
#     - One PDF file (work order/purchase order document)
#     - stage_summary_data.json (bid metadata)
    
#     The endpoint will:
#     1. Extract text from PDFs
#     2. Use Claude LLM to analyze PDF content
#     3. Combine with JSON metadata
#     4. Structure data in standardized format
#     5. Save results to output file
#     """
#     try:
#         logger.info(f"Received eproc processing request for tenant: {request.tenant_id}")
#         logger.info(f"Root directory: {request.root_directory}")
        
#         result = process_eproc_bids_handler(
#             request.root_directory,
#             request.tenant_id,
#             request.output_file_name
#         )
        
#         if result["status"] != "success":
#             raise HTTPException(status_code=400, detail=result["message"])
            
#         return ProcessingResult(**result)
            
#     except Exception as e:
#         logger.error(f"Error in process_eproc_bids endpoint: {e}", exc_info=True)
#         raise HTTPException(status_code=500, detail=str(e))

@router.post("/analyze/sellers", response_model=SellerAnalyticsResult)
async def analyze_sellers(
    request: AnalyzeSellersRequest,
    api_key: str = Depends(lambda: os.environ.get("BID_PROCESSING_API_KEY", "default-key"))
):
    """Analyze sellers from processed bid data"""
    try:
        result = analyze_sellers_handler(
            request.input_file_path,
            request.tenant_id,
            request.output_file_name,
            search_keywords=request.search_keywords,
            search_mode=request.search_mode
        )
        
        if result["status"] != "success":
            raise HTTPException(status_code=500, detail=result["message"])
            
        return SellerAnalyticsResult(**result)
            
    except Exception as e:
        logger.error(f"Error in analyze_sellers endpoint: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@router.post("/analyze/bids", response_model=BidAnalyticsResponse)
async def get_bid_analytics(
    request: GetBidAnalyticsRequest,
    api_key: str = Depends(lambda: os.environ.get("BID_PROCESSING_API_KEY", "default-key"))
):
    """Get analytics from bid analysis data"""
    try:
        result = get_bid_analytics_handler(
            request.analysis_file_path,
            request.tenant_id
        )
        
        if result["status"] != "success":
            raise HTTPException(status_code=500, detail=result["summary"].get("error", "Unknown error"))
            
        return BidAnalyticsResponse(**result)
            
    except Exception as e:
        logger.error(f"Error in get_bid_analytics endpoint: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@router.get("/processing-status/{tenant_id}")
async def get_processing_status(
    tenant_id: str,
    api_key: str = Depends(lambda: os.environ.get("BID_PROCESSING_API_KEY", "default-key"))
):
    """Get status of processing jobs for a tenant"""
    try:
        output_base = Path("/tmp/bid_processing") / tenant_id
        
        if not output_base.exists():
            return {
                "tenant_id": tenant_id,
                "status": "no_processing",
                "message": "No processing history found for this tenant"
            }
        
        # List all processing directories and their files
        processing_dirs = {}
        for process_type in output_base.iterdir():
            if process_type.is_dir():
                files = list(process_type.glob("*.json"))
                processing_dirs[process_type.name] = {
                    "file_count": len(files),
                    "files": [f.name for f in files],
                    "last_modified": max([datetime.fromtimestamp(f.stat().st_mtime).isoformat() for f in files]) if files else None
                }
        
        return {
            "tenant_id": tenant_id,
            "status": "success",
            "processing_history": processing_dirs
        }
        
    except Exception as e:
        logger.error(f"Error getting processing status: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@router.get("/export/{tenant_id}/{process_type}")
async def export_processed_data(
    tenant_id: str,
    process_type: str,
    file_name: str,
    api_key: str = Depends(lambda: os.environ.get("BID_PROCESSING_API_KEY", "default-key"))
):
    """Export processed bid data"""
    try:
        output_dir = get_tenant_output_dir(tenant_id, process_type)
        file_path = output_dir / file_name
        
        if not file_path.exists():
            raise HTTPException(status_code=404, detail=f"File not found: {file_name}")
        
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        return {
            "status": "success",
            "tenant_id": tenant_id,
            "file_name": file_name,
            "data": data
        }
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error exporting data: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/process-and-analyze")
async def process_and_analyze_gem_bids(
    request: ProcessGeMBidsDirectRequest,
    api_key: str = Depends(lambda: os.environ.get("BID_PROCESSING_API_KEY", "default-key"))
):
    """
    Combined endpoint: Process GeM bids and analyze sellers in one call
    Accepts JSON data directly, returns processed and analyzed data
    
    Steps:
    1. Process GeM bid data (refine_gem_bid_data)
    2. Analyze sellers (seller_analysis)
    3. Return combined results as JSON
    """
    start_time = time.time()
    try:
        logger.info(f"Starting combined GeM processing and seller analysis for tenant {request.tenant_id}")
        
        # Validate input
        if not request.bids_data or not isinstance(request.bids_data, dict):
            raise HTTPException(status_code=400, detail="Invalid bids_data: must be a dictionary")
        
        # Step 1: Process GeM bids
        logger.info("Step 1: Processing GeM bids...")
        
        # Prepare data in the format expected by gem_refine
        data_to_process = request.bids_data
        if 'metadata' not in data_to_process:
            data_to_process['metadata'] = {
                "scrapedAt": datetime.now().isoformat(),
                "totalBids": len(data_to_process.get('bids', []))
            }
        if 'bids' not in data_to_process:
            raise HTTPException(status_code=400, detail="Invalid bids_data: must contain 'bids' key")
        
        # Process the bids
        processed_data = gem_refine.process_bids_data(data_to_process)
        logger.info(f"Successfully processed {len(processed_data['bids'])} bids")
        
        # Save processed data temporarily for seller analysis
        output_dir = get_tenant_output_dir(request.tenant_id, "combined_processing")
        processed_file_path = output_dir / f"processed_{int(time.time())}.json"
        
        with open(processed_file_path, 'w', encoding='utf-8') as f:
            json.dump(processed_data, f, indent=2, ensure_ascii=False)
        
        # Step 2: Analyze sellers
        logger.info("Step 2: Analyzing sellers...")
        
        analysis_file_path = output_dir / f"analysis_{int(time.time())}.json"
        
        # Run seller analysis with optional keyword filtering
        seller_stats.analyze_bidders(
            str(processed_file_path),
            str(analysis_file_path),
            search_keywords=request.search_keywords,
            search_mode=request.search_mode
        )
        
        logger.info("Seller analysis completed")
        
        # Load analysis results
        with open(analysis_file_path, 'r', encoding='utf-8') as f:
            analysis_results = json.load(f)
        
        processing_time = time.time() - start_time
        
        # Calculate combined statistics
        sellers = analysis_results.get('sellers', [])
        total_bids = sum(s.get('total_bids_applied', 0) for s in sellers)
        total_l1_winners = sum(s.get('total_bids_won_l1', 0) for s in sellers)
        
        # Prepare response
        response = {
            "status": "success",
            "tenant_id": request.tenant_id,
            "processing_time": processing_time,
            "message": f"Successfully processed {len(processed_data['bids'])} bids and analyzed {len(sellers)} sellers",
            "processing_summary": {
                "step_1_gem_processing": {
                    "status": "completed",
                    "bids_processed": len(processed_data['bids']),
                    "total_sellers_found": sum(
                        len(bid.get('detailedInfo', {}).get('sections', {}).get('evaluation', {}).get('sellers', []))
                        for bid in processed_data['bids']
                    )
                },
                "step_2_seller_analysis": {
                    "status": "completed",
                    "unique_sellers": len(sellers),
                    "total_bids_analyzed": total_bids,
                    "total_qualified": sum(s.get('total_bids_qualified', 0) for s in sellers),
                    "total_disqualified": sum(s.get('total_bids_disqualified', 0) for s in sellers),
                    "total_l1_winners": total_l1_winners,
                    "success_rate": (total_l1_winners / total_bids * 100) if total_bids > 0 else 0
                }
            },
            "processed_bids_data": processed_data,
            "seller_analysis_data": analysis_results,
            "top_10_sellers": sellers[:10],
            "metadata": {
                "processed_at": datetime.now().isoformat(),
                "keywords_filtered": request.search_keywords,
                "search_mode": request.search_mode
            }
        }
        
        logger.info(f"Combined processing completed successfully in {processing_time:.2f}s")
        
        return response
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error in process_and_analyze_gem_bids: {str(e)}", exc_info=True)
        raise HTTPException(
            status_code=500,
            detail=f"Failed to process and analyze bids: {str(e)}"
        )

@router.post("/process-and-analyze-file")
async def process_and_analyze_gem_bids_file(
    request: ProcessGeMBidsFullRequest,
    api_key: str = Depends(lambda: os.environ.get("BID_PROCESSING_API_KEY", "default-key"))
):
    """
    Combined endpoint: Process GeM bids from file and analyze sellers in ONE call
    
    Parameters:
    - input_file_path: Path to input JSON file
    - tenant_id: Tenant identifier
    - search_keywords: Optional keywords to filter sellers
    - search_mode: 'any' or 'all' for keyword filtering
    - return_type: 'json' (return in response body) or 'file' (save to disk)
    - output_file_path: Optional custom output file path
    
    Returns ONLY the final seller analysis (no intermediate files)
    """
    start_time = time.time()
    try:
        logger.info(f"Starting combined GeM processing and seller analysis for tenant {request.tenant_id}")
        logger.info(f"Return type: {request.return_type}")
        
        # Validate inputs
        if request.return_type not in ["json", "file"]:
            raise HTTPException(status_code=400, detail="return_type must be 'json' or 'file'")
        
        if not verify_file_exists(request.input_file_path):
            raise HTTPException(status_code=400, detail=f"Input file not found: {request.input_file_path}")
        
        # Step 1: Load bids data from file
        logger.info(f"Step 1: Loading bids from file: {request.input_file_path}")
        with open(request.input_file_path, 'r', encoding='utf-8') as f:
            bids_data = json.load(f)
        
        logger.info(f"Loaded {len(bids_data.get('bids', []))} bids from file")
        
        # Step 2: Process GeM bids
        logger.info("Step 2: Processing GeM bids...")
        if 'metadata' not in bids_data:
            bids_data['metadata'] = {
                "scrapedAt": datetime.now().isoformat(),
                "totalBids": len(bids_data.get('bids', []))
            }
        
        processed_data = gem_refine.process_bids_data(bids_data)
        logger.info(f"Successfully processed {len(processed_data['bids'])} bids")
        
        # Create temp directory
        output_dir = get_tenant_output_dir(request.tenant_id, "combined_processing")
        timestamp = int(time.time())
        temp_processed_file = output_dir / f".temp_processed_{timestamp}.json"
        temp_analysis_file = output_dir / f".temp_analysis_{timestamp}.json"
        
        # Save processed data temporarily
        with open(temp_processed_file, 'w', encoding='utf-8') as f:
            json.dump(processed_data, f, indent=2, ensure_ascii=False)
        
        # Step 3: Analyze sellers
        logger.info("Step 3: Analyzing sellers...")
        seller_stats.analyze_bidders(
            str(temp_processed_file),
            str(temp_analysis_file),
            search_keywords=request.search_keywords,
            search_mode=request.search_mode
        )
        
        # Load analysis results
        with open(temp_analysis_file, 'r', encoding='utf-8') as f:
            analysis_results = json.load(f)
        
        processing_time = time.time() - start_time
        
        # Calculate statistics
        sellers = analysis_results.get('sellers', [])
        total_bids = sum(s.get('total_bids_applied', 0) for s in sellers)
        total_l1_winners = sum(s.get('total_bids_won_l1', 0) for s in sellers)
        
        summary_data = {
            "processing_time": processing_time,
            "bids_processed": len(processed_data['bids']),
            "unique_sellers": len(sellers),
            "total_bids_analyzed": total_bids,
            "total_qualified": sum(s.get('total_bids_qualified', 0) for s in sellers),
            "total_disqualified": sum(s.get('total_bids_disqualified', 0) for s in sellers),
            "total_l1_winners": total_l1_winners,
            "success_rate": (total_l1_winners / total_bids * 100) if total_bids > 0 else 0
        }
        
        # Clean up temp files
        try:
            temp_processed_file.unlink()
            temp_analysis_file.unlink()
        except Exception as e:
            logger.warning(f"Could not clean up temp files: {e}")
        
        # Handle return type
        if request.return_type == "json":
            logger.info(f"Returning seller analysis as JSON in response body")
            response_data = analysis_results.copy()
            response_data["processing_summary"] = summary_data
            response_data["status"] = "success"
            logger.info(f"Processing completed in {processing_time:.2f}s")
            return response_data
        
        else:  # return_type == "file"
            # Determine output file path
            if request.output_file_path:
                output_file_path = Path(request.output_file_path)
                output_file_path.parent.mkdir(parents=True, exist_ok=True)
            else:
                output_file_path = output_dir / "seller_analysis_output.json"
            
            # Save seller analysis
            logger.info(f"Saving seller analysis to: {output_file_path}")
            with open(output_file_path, 'w', encoding='utf-8') as f:
                json.dump(analysis_results, f, indent=2, ensure_ascii=False)
            
            logger.info(f"Processing completed and saved in {processing_time:.2f}s")
            
            return {
                "status": "success",
                "message": f"Successfully processed {len(processed_data['bids'])} bids and analyzed {len(sellers)} sellers",
                "file_path": str(output_file_path),
                "processing_time": processing_time,
                "record_count": len(sellers),
                "details": {
                    "output_file": str(output_file_path),
                    "stats": summary_data
                }
            }
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error in process_and_analyze_gem_bids_file: {str(e)}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Failed to process and analyze bids: {str(e)}")