# Copyright © 2025 Intellisol LLC. All Rights Reserved.
#
# This file is part of the Intellisol Automation System.
#
# This software is a trade secret of Intellisol LLC. It is proprietary and
# confidential information. You may not disclose this software or any part of it
# to any third party, or use it in any way not expressly authorized by the
# accompanying End-User License Agreement (EULA).
#
# UNPUBLISHED. RIGHTS RESERVED.


# data_extraction_system/agents/data_extractor_agent.py
from google.adk.agents import LlmAgent
from google.adk.models import Gemini
from google.adk.tools import FunctionTool
from pydantic import BaseModel, Field
from typing import Dict
from google.cloud import documentai_v1 as documentai
import os

class ExtractionResult(BaseModel):
    extracted_data: Dict = Field(description="Extracted data from the document chunk, conforming to the MortgageData schema.")

def extract_data(document_chunk: str) -> ExtractionResult:
    """Extracts data from a document chunk using Google Cloud Document AI."""
    project_id = os.getenv("GOOGLE_CLOUD_PROJECT_ID")
    location = os.getenv("DOCUMENT_AI_LOCATION", "us-central1")
    processor_id = os.getenv("DOCUMENT_AI_PROCESSOR_ID")

    if not all([project_id, processor_id]):
        raise ValueError("GOOGLE_CLOUD_PROJECT_ID and DOCUMENT_AI_PROCESSOR_ID must be set.")

    client = documentai.DocumentProcessorServiceClient()
    name = client.processor_path(project_id, location, processor_id)

    # Document AI expects raw bytes for processing. For text chunks, we'll simulate.
    # In a real scenario, you'd process the original document or image directly.
    # For this example, we'll assume the chunk is text and try to extract based on that.
    # A more robust solution would involve passing file paths or actual document objects.

    # Placeholder for actual Document AI processing. This part needs refinement
    # based on how document_chunk is obtained and what Document AI processor is used.
    # For now, we'll simulate extraction based on the text content.
    print(f"Simulating extraction for chunk: {document_chunk[:100]}...")
    # Example: If the chunk contains 'Loan Amount: $500,000', extract it.
    extracted_data = {}
    if "borrower_name" in document_chunk.lower():
        extracted_data["borrower_name"] = "John Doe"
    if "loan amount" in document_chunk.lower():
        extracted_data["loan_amount"] = 500000
    if "property address" in document_chunk.lower():
        extracted_data["property_address"] = "123 Main St, Anytown, USA"

    return ExtractionResult(extracted_data=extracted_data)

data_extractor_agent = LlmAgent(
    name="DataExtractorAgent",
    description="Extracts relevant information from mortgage document chunks using Google Cloud Document AI.",
    model=Gemini(model="gemini-2.5-flash-lite"),
    instruction="You are responsible for extracting key information from mortgage document chunks. Use Google Cloud Document AI to identify and extract data points such as borrower name, loan amount, property address, etc. Return the extracted data in the specified ExtractionResult schema.",
    tools=[
        FunctionTool(func=extract_data)
    ],
    output_schema=ExtractionResult
)
