Skip to content

Scrapegraph

ScrapegraphToolSpec #

Bases: BaseToolSpec

scrapegraph tool specification for web scraping operations.

Source code in llama-index-integrations/tools/llama-index-tools-scrapegraph/llama_index/tools/scrapegraph/base.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
class ScrapegraphToolSpec(BaseToolSpec):
    """scrapegraph tool specification for web scraping operations."""

    spec_functions = [
        "scrapegraph_smartscraper",
        "scrapegraph_markdownify",
        "scrapegraph_local_scrape",
    ]

    def scrapegraph_smartscraper(
        self,
        prompt: str,
        url: str,
        api_key: str,
        schema: Optional[List[BaseModel]] = None,
    ) -> List[Dict]:
        """Perform synchronous web scraping using scrapegraph.

        Args:
            prompt (str): User prompt describing the scraping task
            url (str): Target website URL to scrape
            api_key (str): scrapegraph API key
            schema (Optional[List[BaseModel]]): Pydantic models defining the output structure

        Returns:
            List[Dict]: Scraped data matching the provided schema
        """
        client = Client(api_key=api_key)

        # Basic usage
        return client.smartscraper(
            website_url=url, user_prompt=prompt, output_schema=schema
        )

    def scrapegraph_markdownify(self, url: str, api_key: str) -> str:
        """Convert webpage content to markdown format using scrapegraph.

        Args:
            url (str): Target website URL to convert
            api_key (str): scrapegraph API key

        Returns:
            str: Markdown representation of the webpage content
        """
        client = Client(api_key=api_key)

        return client.markdownify(website_url=url)

    def scrapegraph_local_scrape(self, text: str, api_key: str) -> str:
        """Extract structured data from raw text using scrapegraph.

        Args:
            text (str): Raw text to process and extract data from
            api_key (str): scrapegraph API key

        Returns:
            str: Structured data extracted from the input text
        """
        client = Client(api_key=api_key)

        return client.local_scrape(text=text)

scrapegraph_smartscraper #

scrapegraph_smartscraper(prompt: str, url: str, api_key: str, schema: Optional[List[BaseModel]] = None) -> List[Dict]

Perform synchronous web scraping using scrapegraph.

Parameters:

Name Type Description Default
prompt str

User prompt describing the scraping task

required
url str

Target website URL to scrape

required
api_key str

scrapegraph API key

required
schema Optional[List[BaseModel]]

Pydantic models defining the output structure

None

Returns:

Type Description
List[Dict]

List[Dict]: Scraped data matching the provided schema

Source code in llama-index-integrations/tools/llama-index-tools-scrapegraph/llama_index/tools/scrapegraph/base.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def scrapegraph_smartscraper(
    self,
    prompt: str,
    url: str,
    api_key: str,
    schema: Optional[List[BaseModel]] = None,
) -> List[Dict]:
    """Perform synchronous web scraping using scrapegraph.

    Args:
        prompt (str): User prompt describing the scraping task
        url (str): Target website URL to scrape
        api_key (str): scrapegraph API key
        schema (Optional[List[BaseModel]]): Pydantic models defining the output structure

    Returns:
        List[Dict]: Scraped data matching the provided schema
    """
    client = Client(api_key=api_key)

    # Basic usage
    return client.smartscraper(
        website_url=url, user_prompt=prompt, output_schema=schema
    )

scrapegraph_markdownify #

scrapegraph_markdownify(url: str, api_key: str) -> str

Convert webpage content to markdown format using scrapegraph.

Parameters:

Name Type Description Default
url str

Target website URL to convert

required
api_key str

scrapegraph API key

required

Returns:

Name Type Description
str str

Markdown representation of the webpage content

Source code in llama-index-integrations/tools/llama-index-tools-scrapegraph/llama_index/tools/scrapegraph/base.py
45
46
47
48
49
50
51
52
53
54
55
56
57
def scrapegraph_markdownify(self, url: str, api_key: str) -> str:
    """Convert webpage content to markdown format using scrapegraph.

    Args:
        url (str): Target website URL to convert
        api_key (str): scrapegraph API key

    Returns:
        str: Markdown representation of the webpage content
    """
    client = Client(api_key=api_key)

    return client.markdownify(website_url=url)

scrapegraph_local_scrape #

scrapegraph_local_scrape(text: str, api_key: str) -> str

Extract structured data from raw text using scrapegraph.

Parameters:

Name Type Description Default
text str

Raw text to process and extract data from

required
api_key str

scrapegraph API key

required

Returns:

Name Type Description
str str

Structured data extracted from the input text

Source code in llama-index-integrations/tools/llama-index-tools-scrapegraph/llama_index/tools/scrapegraph/base.py
59
60
61
62
63
64
65
66
67
68
69
70
71
def scrapegraph_local_scrape(self, text: str, api_key: str) -> str:
    """Extract structured data from raw text using scrapegraph.

    Args:
        text (str): Raw text to process and extract data from
        api_key (str): scrapegraph API key

    Returns:
        str: Structured data extracted from the input text
    """
    client = Client(api_key=api_key)

    return client.local_scrape(text=text)