from crewai.tools import BaseTool
from pydantic import BaseModel, Field, ConfigDict
from typing import List, Optional, Literal
from apify import Actor
from src.tools.base import RunApifyActor
class RedditScraperInput(BaseModel):
"""Input schema for RedditScraper tool."""
searches: List[str] = Field(
description="Here you can provide a search query which will be used to search Reddit's topics."
)
startUrls: Optional[List[str]] = Field(
description="If you already have URL(s) of page(s) you wish to scrape, you can set them here. If you want to use the search field below, remove all startUrls here.",
default=None
)
skipComments: Optional[bool] = Field(
default=False,
description="This will skip scrapping comments when going through posts"
)
# Additional parameters...
class RedditScraperTool(BaseTool):
name: str = "Reddit Scraper"
description: str = "Tool for scraping Reddit content with configurable parameters"
args_schema: type[BaseModel] = RedditScraperInput
actor: Actor = Field(description="Apify Actor instance")
model_config = ConfigDict(arbitrary_types_allowed=True)
def _run(
self,
searches: List[str],
startUrls: Optional[List[str]] = None,
skipComments: Optional[bool] = False,
# Additional parameters...
) -> str:
run_inputs = {}
if searches:
run_inputs["searches"] = searches
if startUrls:
run_inputs["startUrls"] = startUrls
if skipComments:
run_inputs["skipComments"] = skipComments
# Set additional parameters...
run_actor = RunApifyActor(self.actor)
dataset = run_actor._run("reddit-scraper-actor-name", run_inputs)
return dataset