Skip to content

Creating Custom BlendSQL Ingredients

All the built-in LLM ingredients inherit from the base classes QAIngredient, MapIngredient, and JoinIngredient.

These are intended to be helpful abstractions, so that the user can easily implement their own functions to run within a BlendSQL script.

The processing logic for a custom ingredient should go in a run() class function, and accept **kwargs in their signature.

QAIngredient

Source code in blendsql/ingredients/ingredient.py
422
423
424
@abstractmethod
def run(self, *args, **kwargs) -> Union[str, int, float]:
    ...

Example:

import pandas as pd
import outlines

from blendsql.models import Model
from blendsql.ingredients import QAIngredient
from blendsql._program import Program


class SummaryProgram(Program):
    """Program to call Model and return summary of the passed table.
    """

    def __call__(self, model: Model, serialized_db: str):
        prompt = f"Summarize the table below.\n\n{serialized_db}\n"
        # Below we follow the outlines pattern for unconstrained text generation
        # https://github.com/outlines-dev/outlines
        generator = outlines.generate.text(model.model_obj)
        # Finally, return (response, prompt) tuple
        # Returning the prompt here allows the underlying BlendSQL classes to track token usage
        return (generator(prompt), prompt)


class TableSummary(QAIngredient):
    def run(self, model: Model, context: pd.DataFrame, **kwargs) -> str:
        result = model.predict(program=SummaryProgram, serialized_db=context.to_string())
        return f"'{result}'"


if __name__ == "__main__":
    from blendsql import blend
    from blendsql.db import SQLite
    from blendsql.utils import fetch_from_hub
    from blendsql.models import TransformersLLM

    blendsql = """
    {{
        TableSummary(
            context=(SELECT * FROM transactions LIMIT 10)
        )
    }}
    """

    smoothie = blend(
        query=blendsql,
        default_model=TransformersLLM("Qwen/Qwen1.5-0.5B"),
        db=SQLite(fetch_from_hub("single_table.db")),
        ingredients={TableSummary}
    )
    # Now, we can get results
    print(smoothie.df)
    # ...and token usage
    print(smoothie.meta.prompt_tokens)
    print(smoothie.meta.completion_tokens)

Returns:

'The table shows a list of expenses made through Zelle, an online payment service. The expenses range from $175 to $2000. All transactions are categorized under "Cash/ATM" as the parent category. The dates of these transactions span throughout the year 2022.'

MapIngredient

Source code in blendsql/ingredients/ingredient.py
210
211
212
@abstractmethod
def run(self, *args, **kwargs) -> Iterable[Any]:
    ...

Example:

from typing import List
from blendsql.ingredients import MapIngredient
import requests


class GetQRCode(MapIngredient):
    """Calls API to generate QR code for a given URL.
    Saves bytes to file in qr_codes/ and returns list of paths.
    https://goqr.me/api/doc/create-qr-code/
    """

    def run(self, values: List[str], **kwargs) -> List[str]:
        imgs_as_bytes = []
        for value in values:
            qr_code_bytes = requests.get(
                "https://api.qrserver.com/v1/create-qr-code/?data=https://{}/&size=100x100".format(value)
            ).content
            imgs_as_bytes.append(qr_code_bytes)
        return imgs_as_bytes


if __name__ == "__main__":
    from blendsql import blend
    from blendsql.db import SQLite
    from blendsql.utils import fetch_from_hub

    blendsql = "SELECT genre, url, {{GetQRCode('QR Code as Bytes:', 'w::url')}} FROM w WHERE genre = 'social'"

    smoothie = blend(
        query=blendsql,
        default_model=None,
        db=SQLite(fetch_from_hub("urls.db")),
        ingredients={GetQRCode}
    )

Returns:

genre url QR Code as Bytes:
social facebook.com b'\x89PNG\r\n\x1a\n\x00\x00...
social instagram.com b'\x89PNG\r\n\x1a\n\x00\x00...
social tiktok.com b'\x89PNG\r\n\x1a\n\x00\x00...

JoinIngredient

Source code in blendsql/ingredients/ingredient.py
366
367
368
@abstractmethod
def run(self, *args, **kwargs) -> dict:
    ...