from crewai.tools import BaseTool
from pydantic import BaseModel, Field, ConfigDict
from typing import List, Optional
from apify import Actor
from src.tools.base import RunApifyActor
class YouTubeScraperInput(BaseModel):
    """Input schema for YouTubeScraper tool."""
    searchQueries: Optional[List[str]] = Field(
        description="Search terms just like you would enter in YouTube's search bar"
    )
    
    maxResultsShorts: Optional[int] = Field(
        default=0,
        description="Limit the number of Shorts videos to crawl"
    )
    
    maxResultStreams: Optional[int] = Field(
        default=0,
        description="Limit the number of Stream videos to crawl"
    )
    
    startUrls: Optional[List[str]] = Field(
        default=[],
        description="Direct URLs to YouTube videos, channels, playlists, hashtags or search results"
    )
    
    # Additional parameters...
class YouTubeScraperTool(BaseTool):
    name: str = "YouTube Scraper"
    description: str = "Tool for scraping YouTube videos, channels, playlists with configurable parameters"
    args_schema: type[BaseModel] = YouTubeScraperInput
    actor: Actor = Field(description="Apify Actor instance")
    model_config = ConfigDict(arbitrary_types_allowed=True)
    
    def _run(
        self,
        searchQueries: Optional[List[str]] = None,
        maxResultsShorts: Optional[int] = 0,
        maxResultStreams: Optional[int] = 0,
        startUrls: Optional[List[str]] = [],
        # Additional parameters...
    ) -> str:
        run_inputs = {}
        
        if searchQueries:
            run_inputs["searchQueries"] = searchQueries
        if maxResultsShorts:
            run_inputs["maxResultsShorts"] = maxResultsShorts
        if maxResultStreams:
            run_inputs["maxResultStreams"] = maxResultStreams
        if startUrls:
            run_inputs["startUrls"] = startUrls
        # Set additional parameters...
        
        run_actor = RunApifyActor(self.actor)
        dataset = run_actor._run("youtube-scraper-actor-name", run_inputs)
        return dataset