Negate Boolean

Transform boolean values to their negation (TRUE to FALSE, FALSE to TRUE).

Negate Boolean

Processing

This function transforms boolean columns within the input data structure (Pandas DataFrame, Polars DataFrame, or Arrow Table) by negating their values (TRUE becomes FALSE, and FALSE becomes TRUE). Users can specify columns using an explicit list, a regex pattern, or choose to negate all identified boolean columns.

Inputs

data: The input data structure (DataFrame or Table) containing the boolean columns to modify.
columns (optional): A list of specific column names to apply the negation operation to. If this is empty and no regex is provided, the function attempts to negate all boolean columns found.
regex pattern (optional): A regular expression string used to dynamically select columns for negation. If provided, this mode takes precedence over the explicit column list.

Inputs Types

Input	Types
`data`	`DataFrame`, `ArrowTable`
`columns`	`List`
`regex pattern`	`Str`

You can check the list of supported types here: Available Type Hints.

Outputs

result: The resulting data structure (DataFrame or Arrow Table) with the specified boolean columns negated, returned in the format defined by the Output Format option.

Outputs Types

Output	Types
`result`	`DataFrame`, `ArrowTable`

You can check the list of supported types here: Available Type Hints.

Options

The Negate Boolean brick contains some changeable options:

Columns to Negate: A list of specific column names that should have their boolean values negated.
Regex Pattern: A regular expression pattern used to identify and select columns for negation. If provided, this pattern overrides the explicit column list.
Output Format: Defines the data structure format for the output data. Available choices are pandas, polars, or arrow.
Safe Mode: If enabled, the function suppresses errors related to non-existent columns or attempting to negate non-boolean data, instead skipping those columns and logging a warning. If disabled, such inconsistencies will halt the flow.
Verbose: If enabled, detailed logging messages about the operation (mode detection, column matching, query execution) are displayed during processing.

import logging
import duckdb
import pandas as pd
import polars as pl
import pyarrow as pa
import re
from coded_flows.types import Union, List, DataFrame, ArrowTable, Str
from coded_flows.utils import CodedFlowsLogger

logger = CodedFlowsLogger(name="Negate Boolean", level=logging.INFO)


def _coalesce(*values):
    return next((v for v in values if v is not None), None)


def _sanitize_identifier(identifier):
    """
    Sanitize SQL identifier by escaping special characters.
    Handles double quotes and other problematic characters.
    """
    return identifier.replace('"', '""')


def _is_boolean_type(duckdb_type):
    """
    Check if a DuckDB type is boolean.
    """
    type_lower = duckdb_type.lower()
    return "bool" in type_lower


def negate_boolean(
    data: Union[DataFrame, ArrowTable],
    columns: List = None,
    regex_pattern: Str = None,
    options=None,
) -> Union[DataFrame, ArrowTable]:
    options = options or {}
    verbose = options.get("verbose", True)
    columns = _coalesce(columns, options.get("columns", []))
    regex_pattern = _coalesce(regex_pattern, options.get("regex_pattern", ""))
    fill_all_columns = len(columns) == 0
    output_format = options.get("output_format", "pandas")
    safe_mode = options.get("safe_mode", False)
    result = None
    conn = None
    if not isinstance(columns, list) or not all((isinstance(c, str) for c in columns)):
        verbose and logger.error(f"Invalid columns format! Expected a list.")
        raise ValueError("Columns must be provided as a list!")
    try:
        negate_mode = None
        if regex_pattern:
            negate_mode = "regex_pattern"
        elif fill_all_columns:
            negate_mode = "all_columns"
        else:
            negate_mode = "column_list"
        verbose and logger.info(
            f"Detected mode: '{negate_mode}'. Starting boolean negation operation."
        )
        data_type = None
        if isinstance(data, pd.DataFrame):
            data_type = "pandas"
        elif isinstance(data, pl.DataFrame):
            data_type = "polars"
        elif isinstance(data, (pa.Table, pa.lib.Table)):
            data_type = "arrow"
        if data_type is None:
            verbose and logger.error(
                f"Input data must be a pandas DataFrame, Polars DataFrame, or Arrow Table"
            )
            raise ValueError(
                "Input data must be a pandas DataFrame, Polars DataFrame, or Arrow Table"
            )
        verbose and logger.info(f"Detected input format: {data_type}.")
        conn = duckdb.connect(":memory:")
        conn.register("input_table", data)
        column_info = conn.execute("DESCRIBE input_table").fetchall()
        all_columns = {col[0]: col[1] for col in column_info}
        verbose and logger.info(f"Total columns in data: {len(all_columns)}.")
        columns_to_negate = []
        if negate_mode == "column_list":
            if not safe_mode:
                missing_columns = [col for col in columns if col not in all_columns]
                if missing_columns:
                    verbose and logger.error(
                        f"Columns not found in data: {missing_columns}"
                    )
                    raise ValueError(f"Columns not found in data: {missing_columns}")
            columns_to_negate = [col for col in columns if col in all_columns]
            skipped = len(columns) - len(columns_to_negate)
            if safe_mode and skipped > 0:
                skipped_cols = [col for col in columns if col not in all_columns]
                verbose and logger.warning(
                    f"Safe mode: Skipped {skipped} non-existent columns: {skipped_cols}"
                )
            verbose and logger.info(
                f"Negating {len(columns_to_negate)} column(s): {columns_to_negate}."
            )
        elif negate_mode == "regex_pattern":
            try:
                pattern = re.compile(regex_pattern)
                columns_to_negate = [
                    col for col in all_columns.keys() if pattern.search(col)
                ]
                if not columns_to_negate:
                    verbose and logger.warning(
                        f"No columns matched regex pattern '{regex_pattern}'. Returning data unchanged."
                    )
                verbose and logger.info(
                    f"Regex pattern '{regex_pattern}' matched {len(columns_to_negate)} columns: {columns_to_negate}."
                )
            except re.error as e:
                verbose and logger.error(f"Invalid regex pattern.")
                raise ValueError(f"Invalid regex pattern!")
        elif negate_mode == "all_columns":
            columns_to_negate = [
                col for col in all_columns.keys() if _is_boolean_type(all_columns[col])
            ]
            verbose and logger.info(
                f"Negating all boolean columns: {len(columns_to_negate)} columns."
            )
        select_parts = []
        negated_count = 0
        for col in all_columns.keys():
            sanitized_col = _sanitize_identifier(col)
            if col in columns_to_negate:
                col_type = all_columns[col]
                if not _is_boolean_type(col_type):
                    if safe_mode:
                        verbose and logger.warning(
                            f"Safe mode: Skipping non-boolean column '{col}' (type: {col_type})."
                        )
                        select_parts.append(f'"{sanitized_col}"')
                        continue
                    else:
                        verbose and logger.error(
                            f"Column '{col}' is not boolean (type: {col_type})."
                        )
                        raise ValueError(
                            f"Column '{col}' is not boolean (type: {col_type}). Cannot apply negation."
                        )
                try:
                    negation_expr = f'NOT "{sanitized_col}"'
                    select_parts.append(f'{negation_expr} AS "{sanitized_col}"')
                    verbose and logger.info(
                        f"Column '{col}' (type: {col_type}): applying negation (NOT)."
                    )
                    negated_count += 1
                except Exception as e:
                    if safe_mode:
                        verbose and logger.warning(
                            f"Safe mode: Skipping column '{col}' due to error: {str(e)}"
                        )
                        select_parts.append(f'"{sanitized_col}"')
                    else:
                        verbose and logger.error(f"Error processing column '{col}'.")
                        raise
            else:
                select_parts.append(f'"{sanitized_col}"')
        if negated_count == 0:
            verbose and logger.warning(
                f"No columns were negated. Returning data unchanged."
            )
            result = data
        else:
            select_clause = ", ".join(select_parts)
            query = f"SELECT {select_clause} FROM input_table"
            verbose and logger.info(f"Executing query to negate boolean values.")
            if output_format == "pandas":
                result = conn.execute(query).df()
                verbose and logger.info(f"Converted result to pandas DataFrame.")
            elif output_format == "polars":
                result = conn.execute(query).pl()
                verbose and logger.info(f"Converted result to Polars DataFrame.")
            elif output_format == "arrow":
                result = conn.execute(query).fetch_arrow_table()
                verbose and logger.info(f"Converted result to Arrow Table.")
            else:
                verbose and logger.error(f"Unsupported output format: {output_format}")
                raise ValueError(f"Unsupported output format: {output_format}")
            verbose and logger.info(
                f"Boolean negation operation completed successfully. Negated {negated_count} column(s)."
            )
    except Exception as e:
        verbose and logger.error(f"Error during boolean negation operation: {str(e)}")
        raise
    finally:
        if conn is not None:
            conn.close()
    return result

Brick Info

version v0.1.5

python 3.10, 3.11, 3.12, 3.13

requirements

pandas
pyarrow
polars[pyarrow]
duckdb