import os
import shutil
import json
import re
import anthropic
import logging
import PyPDF2
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Tuple
import argparse
import sys
import json
import csv

from google import genai
from google.genai import types

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Configuration
BASE_DIR = r"D:\Tenders"
COMPANY_REPOSITORY = r"C:\Users\Vivek\minaions\sample_company_docs"
CLAUDE_API_KEY = "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"
GEMINI_API_KEY = "AIzaSyCzr6L3E8yywy8Ls2errRBOPx740VcjV1g"

# Initialize Claude client
claude_client = anthropic.Anthropic(api_key=CLAUDE_API_KEY)
# Initialize Gemini client
gemini_client = genai.Client(api_key=GEMINI_API_KEY)

llm_model = "gemini" #"claude"

class color:
    PURPLE = '\033[95m'
    CYAN = '\033[96m'
    DARKCYAN = '\033[36m'
    BLUE = '\033[94m'
    GREEN = '\033[92m'
    YELLOW = '\033[93m'
    RED = '\033[91m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
    END = '\033[0m'

class DocumentType:
    STANDARD = "standard"       # Company registration, certificates, etc.
    EXPERIENCE = "experience"   # Past work experience documents
    CUSTOM = "custom"           # Documents to be created specifically for the bid
    ANNEXURE = "annexure"       # Templates from the RFP to be filled

def identify_required_documents(extracted_info: Dict) -> List[Dict]:
    """
    Use Claude to intelligently parse and identify required documents from extracted information
    
    Args:
        extracted_info (dict): Dictionary with extracted tender information
        
    Returns:
        list: List of document requirements with metadata
    """
    document_info = extracted_info.get("Documents needed to submit the bid", "")
    
    if not document_info or document_info == "Not found in any document":
        logger.warning("No document requirements found in extracted information")
        return []
    
    # Get scope of work for context
    scope_of_work = extracted_info.get("Scope of work of the whole project", "")
    eligibility_criteria = extracted_info.get("Eligibility/Qualification Criteria or conditions for bidder", "")
    
    # Create prompt for Claude to analyze document requirements
    prompt = f"""
    You are an expert in government tender document preparation. I'll provide you with text 
    describing document requirements for a tender bid. Analyze this text and extract a structured list 
    of all required documents.
    
    For each document, identify:
    1. Document name (brief but descriptive)
    2. Document type (exactly one of: STANDARD, EXPERIENCE, CUSTOM, ANNEXURE)
       - STANDARD: Standard Company documents like company registrations/incorporation, certificates, acreditions, 
       PAN, GST, MSME certificate, employee details, Team CVs, Turn over & financial documents etc. that the company already has
       - EXPERIENCE: Past work experience documents like work orders, completion certificates, PO, contracts etc.
       - CUSTOM: Documents to be created specifically for this bid (declarations, proposals, cover letter etc)
       - ANNEXURE: Formats/templates provided in the RFP that need to be filled with company information
    3. Source document: For ANNEXURE documents, identify which source document in the RFP contains
       this annexure format (e.g., "Found in GeM-Bidding-123456.pdf")
    4. Description of what the document should contain

    Please note that no two documents should be mentioned in one entry. e.g. if there are multiple annexures to be prepapred
    mention them all as separate documents in the list.
    
    Tender context for reference:
    SCOPE OF WORK SUMMARY: {scope_of_work[:2000] if scope_of_work else "Not provided"}
    ELIGIBILITY CRITERIA SUMMARY: {eligibility_criteria[:500] if eligibility_criteria else "Not provided"}
    
    Document requirements text:
    {document_info}
    
    Return your analysis as a JSON array of objects with these keys:
    - "name": String (Document name)
    - "type": String (One of: STANDARD, EXPERIENCE, CUSTOM, ANNEXURE)
    - "source_document": String (Document name containing the annexure, for ANNEXURE type only)
    - "description": String (Detailed description of the document)
    
    Make sure to identify every required document, even if it's mentioned in passing or in a complex format.
    """
    
    try:
        # Call Claude API
        response = claude_client.messages.create(
            model="claude-3-7-sonnet-latest", #claude-3-5-haiku-20241022
            max_tokens=4000,
            temperature=0,
            system="You are an expert in analyzing tender documents. Extract the requested information accurately in JSON format only.",
            messages=[
                {"role": "user", "content": prompt}
            ]
        )
        
        response_text = response.content[0].text
        
        # Extract JSON from response
        json_match = re.search(r'```json\s*(.*?)\s*```', response_text, re.DOTALL)
        if json_match:
            json_str = json_match.group(1)
        else:
            # Try to find JSON without markdown code blocks
            json_match = re.search(r'\[\s*\{.*\}\s*\]', response_text, re.DOTALL)
            if json_match:
                json_str = json_match.group(0)
            else:
                # Last resort: try to extract just the JSON part
                lines = response_text.split('\n')
                json_lines = []
                started = False
                
                for line in lines:
                    if line.strip().startswith('[') or started:
                        started = True
                        json_lines.append(line)
                        if line.strip().endswith(']'):
                            break
                
                if json_lines:
                    json_str = '\n'.join(json_lines)
                else:
                    json_str = response_text  # Use the entire response as a fallback
        
        # Clean up the JSON string for better parsing
        json_str = json_str.strip()
        if not json_str.startswith('['):
            json_str = '[' + json_str
        if not json_str.endswith(']'):
            json_str = json_str + ']'
        
        # Parse the JSON
        try:
            required_docs = json.loads(json_str)
        except json.JSONDecodeError:
            # If we can't parse it, try a simplified approach
            logger.warning("Failed to parse LLM response as JSON, trying to extract individual documents")
            # Extract document sections from the response
            required_docs = extract_documents_from_text(response_text)
        
        # Log the results
        logger.info(f"LLM identified {len(required_docs)} required documents")
        for i, doc in enumerate(required_docs):
            logger.info(f"Document {i+1}: {doc.get('name', 'Unknown')} - Type: {doc.get('type', 'Unknown')}")
            if doc.get('type') == DocumentType.ANNEXURE and doc.get('source_document'):
                logger.info(f"  Found in: {doc.get('source_document')}")
        
        return required_docs
    
    except Exception as e:
        logger.error(f"Error identifying required documents with LLM: {str(e)}")
        # Fallback to a simpler approach if LLM fails
        return simple_document_extraction(document_info)

def extract_documents_from_text(text: str) -> List[Dict]:
    """
    Helper function to extract document information from text when JSON parsing fails
    
    Args:
        text (str): The text response from Claude
        
    Returns:
        list: List of document dictionaries
    """
    docs = []
    # Look for numbered or bulleted items
    lines = text.split('\n')
    current_doc = {}
    
    for line in lines:
        line = line.strip()
        # Check for document name patterns
        name_match = re.search(r'(?:^|\s)(?:Document|Name):\s*(.+)', line)
        if name_match:
            # Save previous document if exists
            if current_doc and 'name' in current_doc:
                docs.append(current_doc)
            # Start new document
            current_doc = {'name': name_match.group(1)}
            continue
            
        # Check for type
        type_match = re.search(r'(?:^|\s)Type:\s*(STANDARD|EXPERIENCE|CUSTOM|ANNEXURE)', line)
        if type_match and current_doc:
            current_doc['type'] = type_match.group(1)
            continue
            
        # Check for source document
        source_match = re.search(r'(?:^|\s)(?:Source|Found in):\s*(.+)', line)
        if source_match and current_doc:
            current_doc['source_document'] = source_match.group(1)
            continue
            
        # Check for description
        desc_match = re.search(r'(?:^|\s)Description:\s*(.+)', line)
        if desc_match and current_doc:
            current_doc['description'] = desc_match.group(1)
            continue
    
    # Add the last document
    if current_doc and 'name' in current_doc:
        docs.append(current_doc)
    
    return docs

def simple_document_extraction(document_info: str) -> List[Dict]:
    """
    Simple fallback method to extract document requirements when LLM fails
    
    Args:
        document_info (str): Document requirements text
        
    Returns:
        list: List of basic document dictionaries
    """
    required_docs = []
    
    # Split into lines and look for bullet points or numbers
    lines = document_info.split('\n')
    for line in lines:
        line = line.strip()
        if not line:
            continue
            
        # Check if line starts with a bullet, number, or similar indicator
        if re.match(r'^(\d+\.|\-|\*|\•|\–) ', line):
            doc_name = re.sub(r'^(\d+\.|\-|\*|\•|\–) ', '', line)
            
            # Make a best guess at document type
            doc_type = DocumentType.CUSTOM  # Default
            if any(term in doc_name.lower() for term in [
                "certificate", "registration", "pan", "gst", "msme", "iso", "cmmi", 
                "balance sheet", "income tax return", "audit report"
            ]):
                doc_type = DocumentType.STANDARD
            elif any(term in doc_name.lower() for term in [
                "work order", "completion certificate", "experience", "portfolio"
            ]):
                doc_type = DocumentType.EXPERIENCE
            elif any(term in doc_name.lower() for term in [
                "annexure", "format", "form", "template"
            ]):
                doc_type = DocumentType.ANNEXURE
            
            required_docs.append({
                "name": doc_name,
                "type": doc_type,
                "description": doc_name,
                "source_document": "Unknown" if doc_type == DocumentType.ANNEXURE else None
            })
    
    return required_docs

def get_available_company_documents(company_docs_dir) -> Dict:
    """
    Get a list of available company documents in the repository
    
    Returns:
        dict: Dictionary mapping document types to lists of available documents
    """
    company_docs = {
        DocumentType.STANDARD: [],
        DocumentType.EXPERIENCE: []
    }
    
    # Standard documents folder
    std_doc_path = os.path.join(company_docs_dir, "Standard_Documents")
    if os.path.exists(std_doc_path):
        for file in os.listdir(std_doc_path):
            if file.endswith(('.pdf', '.docx', '.jpg', '.png', '.xlsx', '.csv')):
                company_docs[DocumentType.STANDARD].append({
                    "name": file,
                    "path": os.path.join(std_doc_path, file),
                    "description": get_document_description(os.path.join(std_doc_path, file))
                })
    
    # Experience documents folder
    exp_doc_path = os.path.join(company_docs_dir, "Experience_Documents")
    if os.path.exists(exp_doc_path):
        for file in os.listdir(exp_doc_path):
            if file.endswith(('.pdf', '.docx', '.jpg', '.png', '.xlsx', '.csv')):
                company_docs[DocumentType.EXPERIENCE].append({
                    "name": file,
                    "path": os.path.join(exp_doc_path, file),
                    "description": get_document_description(os.path.join(exp_doc_path, file))
                })
    
    logger.info(f"Found {len(company_docs[DocumentType.STANDARD])} standard documents and "
                f"{len(company_docs[DocumentType.EXPERIENCE])} experience documents")
    
    return company_docs

def get_document_description(file_path: str) -> str:
    """
    Get description for a document based on filename or content
    
    Args:
        file_path (str): Path to the document
        
    Returns:
        str: Document description
    """
    # First check for a description file
    desc_path = file_path + ".desc"
    if os.path.exists(desc_path):
        with open(desc_path, 'r', encoding='utf-8') as f:
            return f.read().strip()
    
    # Otherwise use filename as description
    filename = os.path.basename(file_path)
    # Clean up filename
    description = os.path.splitext(filename)[0]
    description = description.replace('_', ' ').replace('-', ' ')
    
    return description

def write_list_of_docs_to_csv(data, filename):
    """Writes a list of dictionaries to a CSV file.

    Args:
        data: A list of dictionaries.
        filename: The name of the CSV file to write to.
    """
    if not data:
        print("❌ No required documents found in the RFP details.")
        return

    print(f"\n📄 {len(data)} documents identified to be submitted for this bid.")

    fieldnames = ['name', 'type', 'source_document', 'description'] #data[len(data)-1].keys()

    # if os.path.exists(filename):
    #     os.remove(filename)
    #     print(f"File '{filename}' existed already and is removed successfully.")

    with open(filename, 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)

def prepare_bid_documents(bid_dir: str, company_docs_dir: str, company_info: str) -> str:
    """
    Main function to prepare all bid documents
    
    Args:
        bid_dir (str): Path to the bid directory
        
    Returns:
        str: Path to the final documents directory
    """
    extracted_info_path = os.path.join(bid_dir, "tender_analysis")
    extracted_info_file = os.path.join(extracted_info_path, "tender_analysis.json")
    
    if os.path.exists(extracted_info_file):
        with open(extracted_info_file, 'r', encoding='utf-8') as f:
            extracted_info = json.load(f)
    else:
        # Sample extracted info for testing
        print("❌ Looks like this RFP has not been analysed by Minaions before.")
        print("Please check the RFP directory path or analyze the RFP through Minaions first.")
        return "/final/docs/directory"

    # Get scope of work for context
    scope_of_work = extracted_info.get("Scope of work of the whole project", "")

    # Create final documents directory
    final_docs_dir = os.path.join(bid_dir, "final_docs")
    os.makedirs(final_docs_dir, exist_ok=True)
    
    # Create subdirectories for document types
    for doc_type in [DocumentType.STANDARD, DocumentType.EXPERIENCE, DocumentType.CUSTOM, DocumentType.ANNEXURE]:
        os.makedirs(os.path.join(final_docs_dir, doc_type), exist_ok=True)
    
    # Step 1: Identify required documents
    required_documents = identify_required_documents(extracted_info)
    write_list_of_docs_to_csv(required_documents, os.path.join(extracted_info_path, "required_documents.csv"))

    # Step 2: Get available company documents
    available_documents = get_available_company_documents(company_docs_dir)

    # print(f"Available company documents are: \n{available_documents}")

    # Step 3: Match and prepare documents
    document_status = {
        "prepared": [],
        "missing": [],
        "index": []
    }
    
    # Process each required document
    for req_doc in required_documents:
        document_path = None
        
        # print("Required Document:", req_doc)
        # Process based on document type
        if req_doc["type"] == 'STANDARD':
            document_path = process_standard_document(req_doc, available_documents, final_docs_dir)
        elif req_doc["type"] == 'EXPERIENCE':
            document_path = process_experience_document(req_doc, available_documents, final_docs_dir, scope_of_work)
        elif req_doc["type"] == 'CUSTOM':
            document_path = generate_custom_document(req_doc, extracted_info, company_info, scope_of_work, final_docs_dir)
        elif req_doc["type"] == 'ANNEXURE':
            document_path = process_annexure_format(req_doc, extracted_info, company_info, final_docs_dir, bid_dir)

        # Update status
        if document_path:
            rel_path = os.path.relpath(document_path, final_docs_dir)
            document_status["prepared"].append({
                "name": req_doc["name"],
                "type": req_doc["type"],
                "path": rel_path
            })
            document_status["index"].append({
                "name": req_doc["name"],
                "type": req_doc["type"],
                "path": rel_path,
                "status": "Prepared"
            })
        else:
            document_status["missing"].append(req_doc["name"])
            document_status["index"].append({
                "name": req_doc["name"],
                "type": req_doc["type"],
                "path": "",
                "status": "Missing"
            })
    
    # Step 4: Create document index
    index_path = create_document_index(final_docs_dir, document_status["index"])
    
    # Log summary
    logger.info(f"Bid documents preparation completed")
    logger.info(f"Prepared documents: {len(document_status['prepared'])}")
    logger.info(f"Missing documents: {len(document_status['missing'])}")
    if document_status["missing"]:
        logger.warning(f"Missing documents: {', '.join(document_status['missing'])}")
    
    return final_docs_dir

def process_standard_document(req_doc: Dict, available_documents: Dict, final_docs_dir: str) -> str:
    """
    Process standard company document requirement
    
    Args:
        req_doc (dict): Required document information
        available_documents (dict): Dictionary of available company documents
        final_docs_dir (str): Directory for final documents
        
    Returns:
        str: Path to the processed document, or None if not found
    """
    logger.info(f"Processing standard document: {req_doc['name']}")
    
    # Search for matching standard document
    best_match = find_best_document_match(req_doc, available_documents[DocumentType.STANDARD])
    
    if best_match:
        # Copy document to final directory
        dest_dir = os.path.join(final_docs_dir, DocumentType.STANDARD)
        dest_path = os.path.join(dest_dir, os.path.basename(best_match["path"]))
        shutil.copy2(best_match["path"], dest_path)
        
        logger.info(f"Copied standard document: {os.path.basename(best_match['path'])}")
        return dest_path
    else:
        logger.warning(f"No matching standard document found for: {req_doc['name']}")
        return None

def process_experience_document(req_doc: Dict, available_documents: Dict, final_docs_dir: str, scope_of_work: str) -> str:
    """
    Process experience document requirement
    
    Args:
        req_doc (dict): Required document information
        available_documents (dict): Dictionary of available company documents
        final_docs_dir (str): Directory for final documents
        scope_of_work (str): Scope of work for context
        
    Returns:
        str: Path to the processed document, or None if not found
    """
    logger.info(f"Processing experience document: {req_doc['name']}")
    
    # Use scope of work to find relevant experience documents
    if scope_of_work:
        relevant_docs = select_relevant_experience_documents(
            req_doc,
            available_documents[DocumentType.EXPERIENCE],
            scope_of_work
        )
        
        if relevant_docs:
            # Copy most relevant document
            best_match = relevant_docs[0]  # Most relevant first
            dest_dir = os.path.join(final_docs_dir, DocumentType.EXPERIENCE)
            dest_path = os.path.join(dest_dir, os.path.basename(best_match["path"]))
            shutil.copy2(best_match["path"], dest_path)
            
            logger.info(f"Copied experience document: {os.path.basename(best_match['path'])}")
            return dest_path
    
    # Fallback: find best match without considering scope
    best_match = find_best_document_match(req_doc, available_documents[DocumentType.EXPERIENCE])
    
    if best_match:
        # Copy document to final directory
        dest_dir = os.path.join(final_docs_dir, DocumentType.EXPERIENCE)
        dest_path = os.path.join(dest_dir, os.path.basename(best_match["path"]))
        shutil.copy2(best_match["path"], dest_path)
        
        logger.info(f"Copied experience document: {os.path.basename(best_match['path'])}")
        return dest_path
    else:
        logger.warning(f"No matching experience document found for: {req_doc['name']}")
        return None

def find_best_document_match(req_doc: Dict, available_docs: List[Dict]) -> Dict:
    """
    Find the best matching document from available documents
    
    Args:
        req_doc (dict): Required document information
        available_docs (list): List of available documents
        
    Returns:
        dict: Best matching document, or None if no good match
    """
    if not available_docs:
        return None
    
    best_match = None
    best_score = 0
    
    req_terms = set(req_doc["name"].lower().split())
    
    for doc in available_docs:
        # Calculate simple term overlap score
        doc_terms = set(doc["name"].lower().split() + doc["description"].lower().split())
        common_terms = req_terms.intersection(doc_terms)
        
        score = len(common_terms) / max(len(req_terms), 1)
        
        if score > best_score:
            best_score = score
            best_match = doc
    
    # Only return if the match is good enough
    if best_score >= 0.3:  # At least 30% term overlap
        return best_match
    
    return None

def select_relevant_experience_documents(req_doc: Dict, experience_docs: List[Dict], scope_of_work: str) -> List[Dict]:
    """
    Select relevant experience documents based on scope of work
    
    Args:
        req_doc (dict): Required document information
        experience_docs (list): List of available experience documents
        scope_of_work (str): Scope of work for context
        
    Returns:
        list: Ranked list of relevant experience documents
    """
    if not experience_docs or not scope_of_work:
        return []
    
    # Create prompt for Claude to rank experience documents
    prompt = f"""
    You are an expert in government tender bidding. I need to select the most relevant past experience 
    documents for a tender with the following scope of work:
    
    SCOPE OF WORK:
    {scope_of_work[:2000]}  # Limit scope to avoid token limit issues
    
    The tender requires: {req_doc['name']} - {req_doc['description']}
    
    Here are the available experience documents (filename and description):
    """
    
    # Add document descriptions (limited to 20 to avoid token limit)
    for i, doc in enumerate(experience_docs[:20]):
        prompt += f"\n{i+1}. {doc['name']}: {doc['description']}"
    
    prompt += """
    
    Please rank the top 3 most relevant experience documents for this tender requirement, 
    considering relevance to the scope of work. Return only the numbers of the documents in order 
    of relevance, separated by commas. For example: "5,12,3"
    """
    
    try:
        response = claude_client.messages.create(
            model="claude-3-7-sonnet-latest",
            max_tokens=1000,
            temperature=0,
            system="You are an expert in analyzing tender documents and selecting relevant experience documents.",
            messages=[
                {"role": "user", "content": prompt}
            ]
        )
        
        response_text = response.content[0].text.strip()
        
        # Extract rankings - look for numbers separated by commas
        rankings = re.findall(r'\d+', response_text)
        
        # Get the ranked documents
        ranked_docs = []
        for rank in rankings:
            try:
                index = int(rank) - 1
                if 0 <= index < len(experience_docs):
                    ranked_docs.append(experience_docs[index])
            except ValueError:
                continue
        
        return ranked_docs
    
    except Exception as e:
        logger.error(f"Error selecting relevant experience documents: {str(e)}")
        return []

def generate_custom_document(req_doc: Dict, extracted_info: Dict, company_info: str, scope_of_work: str, final_docs_dir: str) -> str:
    """
    Generate a custom document using Claude
    
    Args:
        req_doc (dict): Required document information
        extracted_info (dict): Dictionary with extracted tender information
        final_docs_dir (str): Directory for final documents
        
    Returns:
        str: Path to the generated document, or None if generation failed
    """
    logger.info(f"Generating custom document: {req_doc['name']}")
    
    # Get relevant sections from extracted info
    eligibility = extracted_info.get("Eligibility/Qualification Criteria or conditions for bidder", "")
    payment_terms = extracted_info.get("Payment terms", "")
    
    # Create prompt for Claude to generate document
    prompt = f"""
    You are an expert in government tender document preparation. I need you to create a {req_doc['name']} 
    for a tender bid. The document should be professional, complete, and follow standard formats.
    
    Document Required: {req_doc['name']}
    Description: {req_doc['description']}
    
    Relevant Tender Information:
    
    SCOPE OF WORK:
    {scope_of_work[:1500]}
    
    ELIGIBILITY CRITERIA:
    {eligibility[:1500]}
    
    PAYMENT TERMS:
    {payment_terms[:500]}
    
    Bidder Company Details:
    {company_info}
    
    Please generate the complete document text in a professional format. Include:
    - Appropriate header with company letterhead elements
    - Date and reference number
    - Professional salutation and closure
    - All necessary declarations or statements
    - Any legal language typically required for such a document
    - Proper formatting with bold, line breaks, paragraphs and tabs etc
    
    The document should be ready to print and sign without further modifications.
    """
    
    try:
        response = claude_client.messages.create(
            model="claude-3-7-sonnet-latest",
            max_tokens=4000,
            temperature=0.2,
            system="You are an expert in preparing professional tender documents.",
            messages=[
                {"role": "user", "content": prompt}
            ]
        )
        
        document_text = response.content[0].text
        
        # Create document file
        dest_dir = os.path.join(final_docs_dir, DocumentType.CUSTOM)
        safe_name = re.sub(r'[^\w\s-]', '', req_doc['name']).strip().replace(' ', '_')
        dest_path = os.path.join(dest_dir, f"{safe_name}.txt")
        
        with open(dest_path, 'w', encoding='utf-8') as f:
            f.write(document_text)
        
        logger.info(f"Generated custom document: {os.path.basename(dest_path)}")
        return dest_path
    
    except Exception as e:
        logger.error(f"Error generating custom document: {str(e)}")
        return None

def generate_annexure_fallback(annexure_name: str, file_desc: str, company_info: str, final_docs_dir: str) -> str:
    """
    Fallback function to generate an annexure document when the exact format can't be found

    Args:
        req_doc (dict): Required document information
        final_docs_dir (str): Directory for final documents

    Returns:
        str: Path to the generated document
    """
    print(f"Using fallback method to generate annexure: {annexure_name}")

    # Create prompt for Claude to create a standard format

    annexure_desc = f"Annexure {annexure_name}" + ": " + file_desc

    prompt = f"""
    You are an expert in government tender document preparation. I need to create an annexure document
    for a bid submission, but I dont have the exact format for this document.

    Required Document: 
    
    {annexure_desc}

    Please create a standard professional format typically used for this type of annexure in government tenders.
    This should look like a authentic, professional government tender annexure.

    To create this document, you may please use the appropriate information from this Company Information:

    {company_info}

    The format should include:
    - Professional header with annexure title
    - All standard fields typically found in this type of annexure
    - Appropriate spaces for signatures, dates, and stamps
    - Any declarations or statements typically required

    Please create a complete and professional document ready for submission.
    """

    try:
        if(llm_model == 'gemini'):
            response = gemini_client.models.generate_content(
                model="gemini-2.5-flash-preview-04-17", #"gemini-2.0-flash",
                contents=[prompt],
                config=types.GenerateContentConfig(
                    max_output_tokens=4000,
                    temperature=0.1
                )
            )
            document_text = response.text
        elif(llm_model == 'claude'):
            response = claude_client.messages.create(
                model="claude-3-7-sonnet-latest",
                max_tokens=3000,
                temperature=0.1,
                system="You are an expert in creating professional tender annexure documents.",
                messages=[
                    {"role": "user", "content": prompt}
                ]
            )

            document_text = response.content[0].text

        # Create document file
        dest_dir = os.path.join(final_docs_dir, DocumentType.ANNEXURE)
        safe_name = re.sub(r'[^\w\s-]', '', annexure_name).strip().replace(' ', '_')
        dest_path = os.path.join(dest_dir, f"{safe_name}.txt")

        with open(dest_path, 'w', encoding='utf-8') as f:
            f.write(document_text)

        logger.info(f"Generated fallback annexure document: {os.path.basename(dest_path)}")
        return dest_path

    except Exception as e:
        logger.error(f"Error generating fallback annexure document: {str(e)}")
        return None

def generate_filled_annexure(annexure_name: str, file_desc: str, annexure_number: str, annexure_format: str, company_info: str, final_docs_dir: str) -> str:
    """
    Generate a filled annexure document using the extracted format

    Args:
        req_doc (dict): Required document information
        annexure_format (str): Extracted annexure format
        source_file (Path): Source PDF file
        final_docs_dir (str): Directory for final documents

    Returns:
        str: Path to the generated document
    """
    # Create prompt for LLM to fill the annexure
    annexure_desc = f"Annexure {annexure_number}" if annexure_number else annexure_name + ": " + file_desc

    prompt = f"""
    You are an expert in government tender document preparation. I need to fill out an annexure format
    for a bid submission. I've extracted the exact format from the tender document.

    Required Document: {annexure_desc}

    Here is the exact format from the RFP:

    {annexure_format}

    Please fill this format with appropriate information from this Company Information:
    
    {company_info}
    
    Follow EXACTLY the original layout, tables, and structure, but fill in all blank fields.
    If there are tables in the format, then please use keys like | or _ or - or tabs and spaces to represent the exact format of the table in the output.

    Again please fill in ALL fields with appropriate information for this type of tender from the company information provided above.
    Completely replace the place holder text in the format with the actual information, wherever found.
    If a field needs specific technical information not provided here, use the string '<fill_data_here>'.

    IMPORTANT: Maintain the EXACT formatting and layout of the original. Return ONLY the filled document.
    """

    print("prompt for generating filled annexure:\n\n", prompt)
    try:

        if(llm_model == 'gemini'):
            response = gemini_client.models.generate_content(
                model="gemini-2.5-flash-preview-04-17", #"gemini-2.0-flash",
                contents=[prompt],
                config=types.GenerateContentConfig(
                    max_output_tokens=4000,
                    temperature=0.1
                )
            )
            filled_document = response.text

        elif(llm_model == 'claude'):
            response = claude_client.messages.create(
                model="claude-3-7-sonnet-latest",
                max_tokens=4000,
                temperature=0.1,
                system="You are an expert in preparing professional tender annexure documents. Fill the exact format with appropriate information.",
                messages=[
                    {"role": "user", "content": prompt}
                ]
            )
            filled_document = response.content[0].text

        # Create document file
        dest_dir = os.path.join(final_docs_dir, DocumentType.ANNEXURE)
        safe_name = re.sub(r'[^\w\s-]', '', annexure_name).strip().replace(' ', '_')
        dest_path = os.path.join(dest_dir, f"{safe_name}.txt")
        os.makedirs(dest_dir, exist_ok=True)

        with open(dest_path, 'w', encoding='utf-8') as f:
            f.write(filled_document)

        print(f"Generated filled annexure document: {os.path.basename(dest_path)}")
        return dest_path

    except Exception as e:
        print(f"Error generating filled annexure document: {str(e)}")

def extract_annexure_with_llm(pdf_path: Path, annexure_name: str, file_desc: str, annexure_number: str = None) -> str:
    """
    Use Claude to extract annexure format from PDF

    Args:
        pdf_path (Path): Path to the PDF file
        annexure_name (str): Name of the annexure to search for
        annexure_number (str, optional): Annexure number if available

    Returns:
        str: Extracted annexure content or None if not found
    """
    try:
        # Read the PDF file as bytes
        with open(pdf_path, 'rb') as file:
            pdf_bytes = file.read()

        # Encode the PDF for API submission
        import base64
        pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')

        # Create prompt for Claude
        annexure_desc = f"Annexure {annexure_number}" if annexure_number else annexure_name + ": " + file_desc

        prompt = f"""
        You're helping extract an exact annexure format from a tender document PDF.

        I'm looking for: {annexure_desc}

        Please find this specific annexure in the attached PDF and extract its complete format.
        I need the EXACT format as it appears, including all fields, tables, and formatting.

        FORMAT YOUR RESPONSE LIKE THIS:

        1. First, tell me if you found the annexure or not.
        2. If found, mention the string 'yes found the annexure' and give me the page number(s) where it appears.
        3. Then provide the EXACT TEXT and format of the annexure, preserving all fields, tables and layout.
        4. If there are tables in the format, then please use keys like | or _ or - or tabs and spaces to represent the exact format of the table in the output.

        If you don't find the exact annexure, let me know and don't make up a format.
        """

        if(llm_model == 'gemini'):
            # Retrieve and encode the PDF byte
            filepath = Path(pdf_path)

            response = gemini_client.models.generate_content(
              model="gemini-2.5-flash-preview-04-17", #"gemini-2.0-flash",
              contents=[
                  types.Part.from_bytes(
                    data=filepath.read_bytes(),
                    mime_type='application/pdf',
                  ),
                  prompt])
            
            response_text = response.text

        elif(llm_model == 'claude'):
            # Use anthropic.Anthropic directly with media for PDF support
            # Note: This requires Claude 3 model family which supports PDF analysis
            messages = [
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {
                            "type": "document",
                            "source": {
                                "type": "base64",
                                "media_type": "application/pdf",
                                "data": pdf_base64
                            }
                        }
                    ]
                }
            ]

            response = claude_client.messages.create(
                model="claude-3-7-sonnet-latest", #"claude-3-5-haiku-latest",
                max_tokens=4000,
                temperature=0,
                system="You are an expert in extracting annexure formats from tender documents. Extract ONLY the requested annexure format exactly as it appears.",
                messages=messages
            )

            response_text = response.content[0].text

        # Parse response to extract the annexure format
        found_indicator = re.search(r'yes found the annexure', response_text.lower())
        if not found_indicator:
            return None

        # Extract the format part - typically after "EXACT TEXT" or similar indicator
        format_section = re.split(r'(?:EXACT TEXT|format:|annexure format:|here is the annexure:)',
                                response_text, flags=re.IGNORECASE)

        if len(format_section) > 1:
            return format_section[1].strip()

        return response_text

    except Exception as e:
        print(f"Error using Claude to extract annexure from PDF {pdf_path.name}: {str(e)}")
        return None

def process_annexure_format(req_doc: Dict, extracted_info: Dict, company_info: str, final_docs_dir: str, bid_dir: str) -> str:
    """
    Process annexure format document by locating, extracting, and filling the exact format from RFP

    Args:
        req_doc (dict): Required document information
        extracted_info (dict): Dictionary with extracted tender information
        company_info (str): Information of available company
        final_docs_dir (str): Directory for final documents
        bid_dir (str): Path to the bid directory containing all RFP documents

    Returns:
        str: Path to the processed document, or None if processing failed
    """
    print(f"Processing annexure format: {req_doc['name']}")

    # Extract key information for searching
    annexure_name = req_doc['name']
    source_hint = req_doc.get('source_document', 'Unknown')
    file_desc = req_doc.get('description', 'No Description given')

    # Normalize annexure name/number for searching
    annexure_match = re.search(r'(?:annexure|format|form|proforma)\s*[-\s]*([\dIVXivx]+|[a-zA-Z])',
                             annexure_name.lower(), re.IGNORECASE)
    annexure_number = annexure_match.group(1) if annexure_match else None

    print("\nannexure_name and annexure_number are:", annexure_name, annexure_number)
    print(f"\nLooking for annexure {annexure_number if annexure_number else annexure_name}")

    # Step 1: Find all PDF files in the tender directory
    all_pdf_files = list(Path(bid_dir).glob("**/*.pdf"))

    # If we have a source document hint, prioritize that file
    pdf_files = sorted(all_pdf_files,
                      key=lambda x: 1 if source_hint != 'Unknown' and source_hint in x.name else 2)

    if not pdf_files:
        print(f"No PDF files found in tender directory for extracting annexure")
        return generate_annexure_fallback(annexure_name, file_desc, company_info, final_docs_dir)

    # Step 2: Search for annexure in PDF documents
    annexure_content = None
    source_file = None

    # Use LLM to analyze PDFs
    print("Trying LLM PDF analysis to find annexure")
    for pdf_file in pdf_files:
        annexure_content = extract_annexure_with_llm(pdf_file, annexure_name, file_desc, annexure_number)

        if annexure_content:
            source_file = pdf_file
            print(f"Found annexure content using Claude in {pdf_file.name}")
            break
        else:
            print(f"Annexure not found in document {pdf_file.name}")

    # Step 3: Generate the filled annexure document
    if annexure_content:
        return generate_filled_annexure(annexure_name, file_desc, annexure_number, annexure_content, company_info, final_docs_dir)
    else:
        print(f"Could not find annexure format in any tender document")
        return generate_annexure_fallback(annexure_name, file_desc, company_info, final_docs_dir)

def create_document_index(final_docs_dir: str, document_list: List[Dict]) -> str:
    """
    Create an index file for all prepared documents
    
    Args:
        final_docs_dir (str): Directory containing final documents
        document_list (list): List of document information
        
    Returns:
        str: Path to the index file
    """
    index_path = os.path.join(final_docs_dir, "document_index.txt")
    
    with open(index_path, 'w', encoding='utf-8') as f:
        f.write("BID DOCUMENTS INDEX\n")
        f.write("===================\n\n")
        f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
        
        # Group by document type
        for doc_type in [DocumentType.STANDARD, DocumentType.EXPERIENCE, DocumentType.CUSTOM, DocumentType.ANNEXURE]:
            type_docs = [doc for doc in document_list if doc["type"] == doc_type]
            
            if type_docs:
                f.write(f"\n{doc_type.upper()} DOCUMENTS\n")
                f.write("-" * 20 + "\n")
                
                for i, doc in enumerate(type_docs):
                    f.write(f"{i+1}. {doc['name']}")
                    if doc["status"] == "Missing":
                        f.write(" (MISSING)")
                    f.write("\n")
    
    logger.info(f"Created document index at {index_path}")
    return index_path

def main(bid_dir: str, extracted_info: Dict):
    """
    Main function to prepare bid documents
    
    Args:
        bid_dir (str): Path to the bid directory
        extracted_info (dict): Dictionary with extracted tender information
    """
    # Get scope of work for context
    scope_of_work = extracted_info.get("Scope of work of the whole project", "")
    
    # Prepare bid documents
    final_docs_dir = prepare_bid_documents(bid_dir, company_docs_dir, company_info)
    
    print(f"\nBid documents preparation completed")
    print(f"Documents available at: {final_docs_dir}")
    print("\nNext steps:")
    print("1. Review generated documents")
    print("2. Print and sign physical documents")
    print("3. Scan signed documents")
    print("4. Prepare final submission package")

if __name__ == "__main__":
    # Example usage
    bid_dir = r"D:\Tenders\SampleBid123"
    
    # Load extracted info from a file (in a real scenario, this would come from previous step)
    extracted_info_path = os.path.join(bid_dir, "extracted_info.json")
    
    if os.path.exists(extracted_info_path):
        with open(extracted_info_path, 'r', encoding='utf-8') as f:
            extracted_info = json.load(f)
    else:
        # Sample extracted info for testing
        extracted_info = {
            "Documents needed to submit the bid": """
            The following documents are required:
            - Certificate of incorporation and PAN Card
            - GST registration certificate
            - Work experience certificates
            - Declaration of not being blacklisted
            - Technical proposal with methodology
            - Financial proposal
            - Annexure 5: Company details form
            - EMD payment proof
            """,
            "Scope of work of the whole project": "Development of a social media management platform",
            "Eligibility/Qualification Criteria or conditions for bidder": "Minimum 3 years experience in IT services"
        }
    
    main(bid_dir, extracted_info)