from crewai.tools import BaseTool
from pydantic import BaseModel, Field, ConfigDict
from typing import List, Optional
from apify import Actor
from src.tools.base import RunApifyActor
class YouTubeScraperInput(BaseModel):
"""Input schema for YouTubeScraper tool."""
searchQueries: Optional[List[str]] = Field(
description="Search terms just like you would enter in YouTube's search bar"
)
maxResultsShorts: Optional[int] = Field(
default=0,
description="Limit the number of Shorts videos to crawl"
)
maxResultStreams: Optional[int] = Field(
default=0,
description="Limit the number of Stream videos to crawl"
)
startUrls: Optional[List[str]] = Field(
default=[],
description="Direct URLs to YouTube videos, channels, playlists, hashtags or search results"
)
# Additional parameters...
class YouTubeScraperTool(BaseTool):
name: str = "YouTube Scraper"
description: str = "Tool for scraping YouTube videos, channels, playlists with configurable parameters"
args_schema: type[BaseModel] = YouTubeScraperInput
actor: Actor = Field(description="Apify Actor instance")
model_config = ConfigDict(arbitrary_types_allowed=True)
def _run(
self,
searchQueries: Optional[List[str]] = None,
maxResultsShorts: Optional[int] = 0,
maxResultStreams: Optional[int] = 0,
startUrls: Optional[List[str]] = [],
# Additional parameters...
) -> str:
run_inputs = {}
if searchQueries:
run_inputs["searchQueries"] = searchQueries
if maxResultsShorts:
run_inputs["maxResultsShorts"] = maxResultsShorts
if maxResultStreams:
run_inputs["maxResultStreams"] = maxResultStreams
if startUrls:
run_inputs["startUrls"] = startUrls
# Set additional parameters...
run_actor = RunApifyActor(self.actor)
dataset = run_actor._run("youtube-scraper-actor-name", run_inputs)
return dataset