Skip to main content
Version: 1.3.13

PandasDatasource

Signature

class great_expectations.datasource.fluent.PandasDatasource(
*,
type: Literal['pandas'] = 'pandas',
name: str,
id: Optional[uuid.UUID] = None,
assets: List[great_expectations.datasource.fluent.pandas_datasource._PandasDataAsset] = []
)

Adds a single-batch pandas datasource to the data context.

Parameters

NameDescription

name

The name of this datasource.

assets

An optional dictionary whose keys are Pandas DataAsset names and whose values are Pandas DataAsset objects.

Methods

add_clipboard_asset

Signature

add_clipboard_asset(
name: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
sep: str = '\s+',
dtype_backend: DtypeBackend = None,
kwargs: Optional[dict] = None,
**extra_data: Any
) → pydantic.v1.main.ClipboardAsset

Add a clipboard data asset to the datasource.

Parameters

NameDescription

name

The name of the clipboard asset. This can be any arbitrary string.

**kwargs

Additional keyword arguments to pass to pandas.read_clipboard().

Returns

TypeDescription

pydantic.v1.main.ClipboardAsset

The ClipboardAsset that has been added to this datasource.

add_csv_asset

Signature

add_csv_asset(
name: str,
filepath_or_buffer: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
sep: Optional[str] = None,
delimiter: Optional[str] = None,
header: Union[int,
Sequence[int],
None,
Literal['infer']] = 'infer',
names: Union[Sequence[str],
None] = None,
index_col: Union[IndexLabel,
Literal[False],
None] = None,
usecols: Optional[Union[int,
str,
Sequence[int]]] = None,
dtype: Optional[dict] = None,
engine: Union[CSVEngine,
None] = None,
true_values: Optional[List] = None,
false_values: Optional[List] = None,
skipinitialspace: bool = False,
skiprows: Optional[Union[Sequence[int],
int]] = None,
skipfooter: int = 0,
nrows: Optional[int] = None,
na_values: Union[Sequence[str],
None] = None,
keep_default_na: bool = True,
na_filter: bool = True,
verbose: bool = False,
skip_blank_lines: bool = True,
parse_dates: Union[bool,
Sequence[str],
None] = None,
infer_datetime_format: bool = None,
keep_date_col: bool = False,
date_format: Optional[str] = None,
dayfirst: bool = False,
cache_dates: bool = True,
iterator: bool = False,
chunksize: Optional[int] = None,
compression: CompressionOptions = 'infer',
thousands: Optional[str] = None,
decimal: str = '.',
lineterminator: Optional[str] = None,
quotechar: str = '"',
quoting: int = 0,
doublequote: bool = True,
escapechar: Optional[str] = None,
comment: Optional[str] = None,
encoding: Optional[str] = None,
encoding_errors: Optional[str] = 'strict',
dialect: Optional[str] = None,
on_bad_lines: str = 'error',
delim_whitespace: bool = False,
low_memory: bool = True,
memory_map: bool = False,
float_precision: Union[Literal['high',
'legacy'],
None] = None,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → pydantic.v1.main.CSVAsset

Add a CSV data asset to the datasource.

Parameters

NameDescription

name

The name of the CSV asset. This can be any arbitrary string.

filepath_or_buffer

The path to the CSV file or a URL pointing to the CSV file.

**kwargs

Additional keyword arguments to pass to pandas.read_csv().

Returns

TypeDescription

pydantic.v1.main.CSVAsset

The CSVAsset that has been added to this datasource.

add_dataframe_asset

Signature

add_dataframe_asset(
name: str,
batch_metadata: Optional[BatchMetadata] = None
) → DataFrameAsset

Adds a Dataframe DataAsset to this PandasDatasource object.

Parameters

NameDescription

name

The name of the Dataframe asset. This can be any arbitrary string.

batch_metadata

An arbitrary user defined dictionary with string keys which will get inherited by any batches created from the asset.

Returns

TypeDescription

DataFrameAsset

The DataFrameAsset that has been added to this datasource.

add_excel_asset

Signature

add_excel_asset(
name: str,
io: os.PathLike | str | bytes,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
sheet_name: Optional[Union[str,
int,
List[Union[int,
str]]]] = 0,
header: Union[int,
Sequence[int],
None] = 0,
names: Optional[List[str]] = None,
index_col: Union[int,
Sequence[int],
None] = None,
usecols: Optional[Union[int,
str,
Sequence[int]]] = None,
dtype: Optional[dict] = None,
engine: Union[Literal['xlrd',
'openpyxl',
'odf',
'pyxlsb'],
None] = None,
true_values: Union[Iterable[str],
None] = None,
false_values: Union[Iterable[str],
None] = None,
skiprows: Optional[Union[Sequence[int],
int]] = None,
nrows: Optional[int] = None,
na_values: Any = None,
keep_default_na: bool = True,
na_filter: bool = True,
verbose: bool = False,
parse_dates: Union[List,
Dict,
bool] = False,
date_format: Optional[str] = None,
thousands: Optional[str] = None,
decimal: str = '.',
comment: Optional[str] = None,
skipfooter: int = 0,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
engine_kwargs: Optional[Dict] = None,
**extra_data: Any
) → ExcelAsset

Add an Excel data asset to the datasource.

Parameters

NameDescription

name

The name of the Excel asset. This can be any arbitrary string.

io

The path to the Excel file or a URL pointing to the Excel file.

**kwargs

Additional keyword arguments to pass to pandas.read_excel().

Returns

TypeDescription

ExcelAsset

The ExcelAsset that has been added to this datasource.

add_feather_asset

Signature

add_feather_asset(
name: str,
path: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
columns: Union[Sequence[str],
None] = None,
use_threads: bool = True,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → pydantic.v1.main.FeatherAsset

Add a Feather data asset to the datasource.

Parameters

NameDescription

name

The name of the Feather asset. This can be any arbitrary string.

path

The path to the Feather file or a URL pointing to the Feather file.

**kwargs

Additional keyword arguments to pass to pandas.read_feather().

Returns

TypeDescription

pydantic.v1.main.FeatherAsset

The FeatherAsset that has been added to this datasource.

add_fwf_asset

Signature

add_fwf_asset(
name: str,
filepath_or_buffer: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
colspecs: Union[Sequence[Tuple[int,
int]],
str,
None] = 'infer',
widths: Union[Sequence[int],
None] = None,
infer_nrows: int = 100,
dtype_backend: DtypeBackend = None,
kwargs: Optional[dict] = None,
**extra_data: Any
) → pydantic.v1.main.FeatherAsset

Adds a Fixed Width File DataAsset to the datasource.

Parameters

NameDescription

filepath_or_buffer

The path to the file or a URL pointing to the Feather file.

asset_name

The name of the asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_fwf().

Returns

TypeDescription

pydantic.v1.main.FeatherAsset

The FWFAsset that has been added to this datasource.

add_gbq_asset

Signature

add_gbq_asset(
name: str,
query: str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
project_id: Optional[str] = None,
index_col: Optional[str] = None,
col_order: Optional[List[str]] = None,
reauth: bool = False,
auth_local_webserver: bool = True,
dialect: Optional[str] = None,
location: Optional[str] = None,
configuration: Optional[Dict[str,
Any]] = None,
use_bqstorage_api: Optional[bool] = None,
max_results: Optional[int] = None,
progress_bar_type: Optional[str] = None,
**extra_data: Any
) → pydantic.v1.main.GBQAsset

Add a GBQ data asset to the datasource.

Parameters

NameDescription

name

The name of the GBQ asset. This can be any arbitrary string.

query

The SQL query to send to Google BigQuery.

**kwargs

Additional keyword arguments to pass to pandas.read_gbq().

Returns

TypeDescription

pydantic.v1.main.GBQAsset

The GBQAsset that has been added to this datasource.

add_hdf_asset

Signature

add_hdf_asset(
name: str,
path_or_buf: pd.HDFStore | os.PathLike | str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
key: Any = None,
mode: str = 'r',
errors: str = 'strict',
where: Optional[Union[str,
List]] = None,
start: Optional[int] = None,
stop: Optional[int] = None,
columns: Optional[List[str]] = None,
iterator: bool = False,
chunksize: Optional[int] = None,
kwargs: Optional[dict] = None,
**extra_data: Any
) → HDFAsset

Add an HDF data asset to the datasource.

Parameters

NameDescription

name

The name of the HDF asset. This can be any arbitrary string.

path_or_buf

The path to the HDF file or a URL pointing to the HDF file.

**kwargs

Additional keyword arguments to pass to pandas.read_hdf().

Returns

TypeDescription

HDFAsset

The HDFAsset that has been added to this datasource.

add_html_asset

Signature

add_html_asset(
name: str,
io: os.PathLike | str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
match: Union[str,
Pattern] = '.+',
flavor: Optional[str] = None,
header: Union[int,
Sequence[int],
None] = None,
index_col: Union[int,
Sequence[int],
None] = None,
skiprows: Optional[Union[Sequence[int],
int]] = None,
attrs: Optional[Dict[str,
str]] = None,
parse_dates: bool = False,
thousands: Optional[str] = ',
',
encoding: Optional[str] = None,
decimal: str = '.',
converters: Optional[Dict] = None,
na_values: Union[Iterable[object],
None] = None,
keep_default_na: bool = True,
displayed_only: bool = True,
extract_links: Literal[None,
'header',
'footer',
'body',
'all'] = None,
dtype_backend: DtypeBackend = None,
storage_options: StorageOptions = None,
**extra_data: Any
) → HTMLAsset

Add an HTML data asset to the datasource.

Parameters

NameDescription

name

The name of the HTML asset. This can be any arbitrary string.

io

The path to the HTML file or a URL pointing to the HTML file.

**kwargs

Additional keyword arguments to pass to pandas.read_html().

Returns

TypeDescription

HTMLAsset

The HTMLAsset that has been added to this datasource.

add_json_asset

Signature

add_json_asset(
name: str,
path_or_buf: pydantic.v1.types.Json | pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
orient: Optional[str] = None,
typ: Literal['frame',
'series'] = 'frame',
dtype: Optional[dict] = None,
convert_axes: Optional[bool] = None,
convert_dates: Union[bool,
List[str]] = True,
keep_default_dates: bool = True,
precise_float: bool = False,
date_unit: Optional[str] = None,
encoding: Optional[str] = None,
encoding_errors: Optional[str] = 'strict',
lines: bool = False,
chunksize: Optional[int] = None,
compression: CompressionOptions = 'infer',
nrows: Optional[int] = None,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → pydantic.v1.main.JSONAsset

Add a JSON data asset to the datasource.

Parameters

NameDescription

name

The name of the JSON asset. This can be any arbitrary string.

path_or_buf

The path to the JSON file or a URL pointing to the JSON file.

**kwargs

Additional keyword arguments to pass to pandas.read_json().

Returns

TypeDescription

pydantic.v1.main.JSONAsset

The JSONAsset that has been added to this datasource.

add_orc_asset

Signature

add_orc_asset(
name: str,
path: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
columns: Optional[List[str]] = None,
dtype_backend: DtypeBackend = None,
kwargs: Optional[dict] = None,
**extra_data: Any
) → pydantic.v1.main.ORCAsset

Add an ORC file as a DataAsset to this PandasDatasource object.

Parameters

NameDescription

name

The name to use for the ORC asset. This can be any arbitrary string.

path

The path to the ORC file.

**kwargs

Additional kwargs to pass to the ORC reader.

Returns

TypeDescription

pydantic.v1.main.ORCAsset

The ORCAsset that has been added to this datasource.

add_parquet_asset

Signature

add_parquet_asset(
name: str,
path: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
engine: str = 'auto',
columns: Optional[List[str]] = None,
storage_options: Union[StorageOptions,
None] = None,
use_nullable_dtypes: bool = None,
dtype_backend: DtypeBackend = None,
kwargs: Optional[dict] = None,
**extra_data: Any
) → pydantic.v1.main.ParquetAsset

Add a parquet file as a DataAsset to this PandasDatasource object.

Parameters

NameDescription

name

The name to use for the parquet asset. This can be any arbitrary string.

path

The path to the parquet file.

**kwargs

Additional kwargs to pass to the parquet reader.

Returns

TypeDescription

pydantic.v1.main.ParquetAsset

The ParquetAsset that has been added to this datasource.

add_pickle_asset

Signature

add_pickle_asset(
name: str,
filepath_or_buffer: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
compression: CompressionOptions = 'infer',
storage_options: Union[StorageOptions,
None] = None,
**extra_data: Any
) → pydantic.v1.main.PickleAsset

Add a pickle file as a DataAsset to this PandasDatasource object.

Parameters

NameDescription

name

The name to use for the pickle asset. This can be any arbitrary string.

filepath_or_buffer

The path to the pickle file.

**kwargs

Additional kwargs to pass to the pickle reader.

Returns

TypeDescription

pydantic.v1.main.PickleAsset

The PickleAsset that has been added to this datasource.

add_sas_asset

Signature

add_sas_asset(
name: str,
filepath_or_buffer: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
format: Optional[str] = None,
index: Optional[str] = None,
encoding: Optional[str] = None,
chunksize: Optional[int] = None,
iterator: bool = False,
compression: CompressionOptions = 'infer',
**extra_data: Any
) → pydantic.v1.main.SASAsset

Add a SAS data asset to the datasource.

Parameters

NameDescription

name

The name of the SAS asset. This can be any arbitrary string.

filepath_or_buffer

The path to the SAS file or a URL pointing to the SAS file.

**kwargs

Additional keyword arguments to pass to pandas.read_sas().

Returns

TypeDescription

pydantic.v1.main.SASAsset

The SASAsset that has been added to this datasource.

add_spss_asset

Signature

add_spss_asset(
name: str,
path: pydantic.v1.types.FilePath,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
usecols: Optional[Union[int,
str,
Sequence[int]]] = None,
convert_categoricals: bool = True,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → pydantic.v1.main.SPSSAsset

Add an SPSS data asset to the datasource.

Parameters

NameDescription

name

The name of the SPSS asset. This can be any arbitrary string.

path

The path to the SPSS file.

**kwargs

Additional keyword arguments to pass to pandas.read_spss().

Returns

TypeDescription

pydantic.v1.main.SPSSAsset

The SPSSAsset that has been added to this datasource.

add_sql_asset

Signature

add_sql_asset(
name: str,
sql: sa.select | sa.text | str,
con: sqlalchemy.Engine | sqlite3.Connection | str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
index_col: Optional[Union[str,
List[str]]] = None,
coerce_float: bool = True,
params: Any = None,
parse_dates: Any = None,
columns: Optional[List[str]] = None,
chunksize: Optional[int] = None,
dtype_backend: DtypeBackend = None,
dtype: Optional[dict] = None,
**extra_data: Any
) → SQLAsset

Add a SQL data asset to the datasource.

Parameters

NameDescription

name

The name of the SQL asset. This can be any arbitrary string.

sql

The SQL query to send to the database.

con

The SQLAlchemy connection engine or a string URL to connect to the database.

**kwargs

Additional keyword arguments to pass to pandas.read_sql().

Returns

TypeDescription

SQLAsset

The SQLAsset that has been added to this datasource.

add_sql_query_asset

Signature

add_sql_query_asset(
name: str,
sql: sa.select | sa.text | str,
con: sqlalchemy.Engine | sqlite3.Connection | str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
index_col: Optional[Union[str,
List[str]]] = None,
coerce_float: bool = True,
parse_dates: Optional[Union[List[str],
Dict[str,
str]]] = None,
chunksize: Optional[int] = None,
dtype: Optional[dict] = None,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → SQLQueryAsset

Add a SQL query data asset to the datasource.

Parameters

NameDescription

name

The name of the SQL query asset. This can be any arbitrary string.

sql

The SQL query to send to the database.

con

The SQLAlchemy connection engine or a string URL to connect to the database.

**kwargs

Additional keyword arguments to pass to pandas.read_sql_query().

Returns

TypeDescription

SQLQueryAsset

The SQLQueryAsset that has been added to this datasource.

add_sql_table_asset

Signature

add_sql_table_asset(
name: str,
table_name: str,
con: sqlalchemy.engine.base.Engine | str,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
schema: Optional[str] = None,
index_col: Optional[Union[str,
List[str]]] = None,
coerce_float: bool = True,
parse_dates: Optional[Union[List[str],
Dict[str,
str]]] = None,
columns: Optional[List[str]] = None,
chunksize: Optional[int] = None,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → pydantic.v1.main.SQLTableAsset

Add a SQL table data asset to the datasource.

Parameters

NameDescription

name

The name of the SQL table asset. This can be any arbitrary string.

table_name

The name of the SQL table to read.

con

The SQLAlchemy connection engine or a string URL to connect to the database.

**kwargs

Additional keyword arguments to pass to pandas.read_sql_table().

Returns

TypeDescription

pydantic.v1.main.SQLTableAsset

The SQLTableAsset that has been added to this datasource.

add_stata_asset

Signature

add_stata_asset(
name: str,
filepath_or_buffer: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
convert_dates: bool = True,
convert_categoricals: bool = True,
index_col: Optional[str] = None,
convert_missing: bool = False,
preserve_dtypes: bool = True,
columns: Union[Sequence[str],
None] = None,
order_categoricals: bool = True,
chunksize: Optional[int] = None,
iterator: bool = False,
compression: CompressionOptions = 'infer',
storage_options: Union[StorageOptions,
None] = None,
**extra_data: Any
) → pydantic.v1.main.StataAsset

Add a Stata data asset to the datasource.

Parameters

NameDescription

name

The name of the Stata asset. This can be any arbitrary string.

filepath_or_buffer

The path to the Stata file or a URL pointing to the Stata file.

**kwargs

Additional keyword arguments to pass to pandas.read_stata().

Returns

TypeDescription

pydantic.v1.main.StataAsset

The StataAsset that has been added to this datasource.

add_table_asset

Signature

add_table_asset(
name: str,
filepath_or_buffer: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
sep: Optional[str] = None,
delimiter: Optional[str] = None,
header: Union[int,
Sequence[int],
None,
Literal['infer']] = 'infer',
names: Union[Sequence[str],
None] = None,
index_col: Union[IndexLabel,
Literal[False],
None] = None,
usecols: Optional[Union[int,
str,
Sequence[int]]] = None,
dtype: Optional[dict] = None,
engine: Union[CSVEngine,
None] = None,
true_values: Optional[List] = None,
false_values: Optional[List] = None,
skipinitialspace: bool = False,
skiprows: Optional[Union[Sequence[int],
int]] = None,
skipfooter: int = 0,
nrows: Optional[int] = None,
na_values: Union[Sequence[str],
None] = None,
keep_default_na: bool = True,
na_filter: bool = True,
verbose: bool = False,
skip_blank_lines: bool = True,
parse_dates: Union[bool,
Sequence[str]] = False,
infer_datetime_format: bool = None,
keep_date_col: bool = False,
date_format: Optional[str] = None,
dayfirst: bool = False,
cache_dates: bool = True,
iterator: bool = False,
chunksize: Optional[int] = None,
compression: CompressionOptions = 'infer',
thousands: Optional[str] = None,
decimal: str = '.',
lineterminator: Optional[str] = None,
quotechar: str = '"',
quoting: int = 0,
doublequote: bool = True,
escapechar: Optional[str] = None,
comment: Optional[str] = None,
encoding: Optional[str] = None,
encoding_errors: Optional[str] = 'strict',
dialect: Optional[str] = None,
on_bad_lines: str = 'error',
delim_whitespace: bool = False,
low_memory: bool = True,
memory_map: bool = False,
float_precision: Optional[str] = None,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → pydantic.v1.main.TableAsset

Add a Table data asset to the datasource.

Parameters

NameDescription

name

The name of the Table asset. This can be any arbitrary string.

filepath_or_buffer

The path to the Table file or a URL pointing to the Table file.

**kwargs

Additional keyword arguments to pass to pandas.read_table().

Returns

TypeDescription

pydantic.v1.main.TableAsset

The TableAsset that has been added to this datasource.

add_xml_asset

Signature

add_xml_asset(
name: str,
path_or_buffer: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
*,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
xpath: str = './*',
namespaces: Optional[Dict[str,
str]] = None,
elems_only: bool = False,
attrs_only: bool = False,
names: Union[Sequence[str],
None] = None,
dtype: Optional[dict] = None,
encoding: Optional[str] = 'utf-8',
stylesheet: Union[FilePath,
None] = None,
iterparse: Optional[Dict[str,
List[str]]] = None,
compression: CompressionOptions = 'infer',
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → pydantic.v1.main.XMLAsset

Add an XML data asset to the datasource.

Parameters

NameDescription

name

The name of the XML asset. This can be any arbitrary string.

path_or_buffer

The path to the XML file or a URL pointing to the XML file.

**kwargs

Additional keyword arguments to pass to pandas.read_xml().

Returns

TypeDescription

pydantic.v1.main.XMLAsset

The XMLAsset that has been added to this datasource.

delete_asset

Signature

delete_asset(
name: str
)None

Removes the DataAsset referred to by asset_name from internal list of available DataAsset objects.

Parameters

NameDescription

name

name of DataAsset to be deleted.

get_asset

Signature

get_asset(
name: str
) → great_expectations.datasource.fluent.interfaces._DataAssetT

Returns the DataAsset referred to by asset_name

Parameters

NameDescription

name

name of DataAsset sought.

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces._DataAssetT

if named "DataAsset" object exists; otherwise, exception is raised.

read_clipboard

Signature

read_clipboard(
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
sep: str = '\s+',
dtype_backend: DtypeBackend = None,
kwargs: Optional[dict] = None,
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read a clipboard and return a Batch containing the data.

Parameters

NameDescription

asset_name

The name of the clipboard asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_clipboard().

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral ClipboardAsset.

read_csv

Signature

read_csv(
filepath_or_buffer: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
sep: Optional[str] = None,
delimiter: Optional[str] = None,
header: Union[int,
Sequence[int],
None,
Literal['infer']] = 'infer',
names: Union[Sequence[str],
None] = None,
index_col: Union[IndexLabel,
Literal[False],
None] = None,
usecols: Optional[Union[int,
str,
Sequence[int]]] = None,
dtype: Optional[dict] = None,
engine: Union[CSVEngine,
None] = None,
true_values: Optional[List] = None,
false_values: Optional[List] = None,
skipinitialspace: bool = False,
skiprows: Optional[Union[Sequence[int],
int]] = None,
skipfooter: int = 0,
nrows: Optional[int] = None,
na_values: Union[Sequence[str],
None] = None,
keep_default_na: bool = True,
na_filter: bool = True,
verbose: bool = False,
skip_blank_lines: bool = True,
parse_dates: Union[bool,
Sequence[str],
None] = None,
infer_datetime_format: bool = None,
keep_date_col: bool = False,
date_format: Optional[str] = None,
dayfirst: bool = False,
cache_dates: bool = True,
iterator: bool = False,
chunksize: Optional[int] = None,
compression: CompressionOptions = 'infer',
thousands: Optional[str] = None,
decimal: str = '.',
lineterminator: Optional[str] = None,
quotechar: str = '"',
quoting: int = 0,
doublequote: bool = True,
escapechar: Optional[str] = None,
comment: Optional[str] = None,
encoding: Optional[str] = None,
encoding_errors: Optional[str] = 'strict',
dialect: Optional[str] = None,
on_bad_lines: str = 'error',
delim_whitespace: bool = False,
low_memory: bool = True,
memory_map: bool = False,
float_precision: Union[Literal['high',
'legacy'],
None] = None,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read a CSV file and return a Batch containing the data.

Parameters

NameDescription

filepath_or_buffer

The path to the CSV file or a URL pointing to the CSV file.

asset_name

The name of the CSV asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_csv().

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral CSVAsset.

read_dataframe

Signature

read_dataframe(
dataframe: pd.DataFrame,
asset_name: Optional[str] = None,
batch_metadata: Optional[BatchMetadata] = None
) → Batch

Reads a Dataframe and returns a Batch containing the data.

Parameters

NameDescription

dataframe

The Dataframe containing the data for this data asset.

asset_name

The name of the Dataframe asset, should you wish to use it again.

batch_metadata

An arbitrary user defined dictionary with string keys which will get inherited by any batches created from the asset.

Returns

TypeDescription

Batch

A Batch using an ephemeral DataFrameAsset.

read_excel

Signature

read_excel(
io: os.PathLike | str | bytes,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
sheet_name: Optional[Union[str,
int,
List[Union[int,
str]]]] = 0,
header: Union[int,
Sequence[int],
None] = 0,
names: Optional[List[str]] = None,
index_col: Union[int,
Sequence[int],
None] = None,
usecols: Optional[Union[int,
str,
Sequence[int]]] = None,
dtype: Optional[dict] = None,
engine: Union[Literal['xlrd',
'openpyxl',
'odf',
'pyxlsb'],
None] = None,
true_values: Union[Iterable[str],
None] = None,
false_values: Union[Iterable[str],
None] = None,
skiprows: Optional[Union[Sequence[int],
int]] = None,
nrows: Optional[int] = None,
na_values: Any = None,
keep_default_na: bool = True,
na_filter: bool = True,
verbose: bool = False,
parse_dates: Union[List,
Dict,
bool] = False,
date_format: Optional[str] = None,
thousands: Optional[str] = None,
decimal: str = '.',
comment: Optional[str] = None,
skipfooter: int = 0,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
engine_kwargs: Optional[Dict] = None,
**extra_data: Any
) → Batch

Read an Excel file and return a Batch containing the data.

Parameters

NameDescription

io

The path to the Excel file or a URL pointing to the Excel file.

asset_name

The name of the Excel asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_excel().

Returns

TypeDescription

Batch

A Batch using an ephemeral ExcelAsset.

read_feather

Signature

read_feather(
path: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
columns: Union[Sequence[str],
None] = None,
use_threads: bool = True,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read a Feather file and return a Batch containing the data.

Parameters

NameDescription

path

The path to the Feather file or a URL pointing to the Feather file.

asset_name

The name of the Feather asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_feather().

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral FeatherAsset.

read_fwf

Signature

read_fwf(
filepath_or_buffer: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
colspecs: Union[Sequence[Tuple[int,
int]],
str,
None] = 'infer',
widths: Union[Sequence[int],
None] = None,
infer_nrows: int = 100,
dtype_backend: DtypeBackend = None,
kwargs: Optional[dict] = None,
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read a Fixed Width File and return a Batch containing the data.

Parameters

NameDescription

filepath_or_buffer

The path to the file or a URL pointing to the Feather file.

asset_name

The name of the asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_fwf().

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral FWFAsset.

read_gbq

Signature

read_gbq(
query: str,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
project_id: Optional[str] = None,
index_col: Optional[str] = None,
col_order: Optional[List[str]] = None,
reauth: bool = False,
auth_local_webserver: bool = True,
dialect: Optional[str] = None,
location: Optional[str] = None,
configuration: Optional[Dict[str,
Any]] = None,
use_bqstorage_api: Optional[bool] = None,
max_results: Optional[int] = None,
progress_bar_type: Optional[str] = None,
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read a Google BigQuery query and return a Batch containing the data.

Parameters

NameDescription

query

The SQL query to send to Google BigQuery.

asset_name

The name of the GBQ asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_gbq().

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral GBQAsset.

read_hdf

Signature

read_hdf(
path_or_buf: pd.HDFStore | os.PathLike | str,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
key: Any = None,
mode: str = 'r',
errors: str = 'strict',
where: Optional[Union[str,
List]] = None,
start: Optional[int] = None,
stop: Optional[int] = None,
columns: Optional[List[str]] = None,
iterator: bool = False,
chunksize: Optional[int] = None,
kwargs: Optional[dict] = None,
**extra_data: Any
) → Batch

Read an HDF file and return a Batch containing the data.

Parameters

NameDescription

path_or_buf

The path to the HDF file or a URL pointing to the HDF file.

asset_name

The name of the HDF asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_hdf().

Returns

TypeDescription

Batch

A Batch using an ephemeral HDFAsset.

read_html

Signature

read_html(
io: os.PathLike | str,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
match: Union[str,
Pattern] = '.+',
flavor: Optional[str] = None,
header: Union[int,
Sequence[int],
None] = None,
index_col: Union[int,
Sequence[int],
None] = None,
skiprows: Optional[Union[Sequence[int],
int]] = None,
attrs: Optional[Dict[str,
str]] = None,
parse_dates: bool = False,
thousands: Optional[str] = ',
',
encoding: Optional[str] = None,
decimal: str = '.',
converters: Optional[Dict] = None,
na_values: Union[Iterable[object],
None] = None,
keep_default_na: bool = True,
displayed_only: bool = True,
extract_links: Literal[None,
'header',
'footer',
'body',
'all'] = None,
dtype_backend: DtypeBackend = None,
storage_options: StorageOptions = None,
**extra_data: Any
) → Batch

Read an HTML file and return a Batch containing the data.

Parameters

NameDescription

io

The path to the HTML file or a URL pointing to the HTML file.

asset_name

The name of the HTML asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_html().

Returns

TypeDescription

Batch

A Batch using an ephemeral HTMLAsset.

read_json

Signature

read_json(
path_or_buf: pydantic.v1.types.Json | pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
orient: Optional[str] = None,
typ: Literal['frame',
'series'] = 'frame',
dtype: Optional[dict] = None,
convert_axes: Optional[bool] = None,
convert_dates: Union[bool,
List[str]] = True,
keep_default_dates: bool = True,
precise_float: bool = False,
date_unit: Optional[str] = None,
encoding: Optional[str] = None,
encoding_errors: Optional[str] = 'strict',
lines: bool = False,
chunksize: Optional[int] = None,
compression: CompressionOptions = 'infer',
nrows: Optional[int] = None,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read a JSON file and return a Batch containing the data.

Parameters

NameDescription

path_or_buf

The path to the JSON file or a URL pointing to the JSON file.

asset_name

The name of the JSON asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_json().

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral JSONAsset.

read_orc

Signature

read_orc(
path: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
columns: Optional[List[str]] = None,
dtype_backend: DtypeBackend = None,
kwargs: Optional[dict] = None,
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read an ORC file and return a Batch containing the data.

Parameters

NameDescription

path

The path to the ORC file.

asset_name (optional)

The asset name to use for the ORC file, should you wish to use or refer to it again.

**kwargs

Additional kwargs to pass to the ORC reader.

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral ORCAsset.

read_parquet

Signature

read_parquet(
path: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
engine: str = 'auto',
columns: Optional[List[str]] = None,
storage_options: Union[StorageOptions,
None] = None,
use_nullable_dtypes: bool = None,
dtype_backend: DtypeBackend = None,
kwargs: Optional[dict] = None,
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read a parquet file and return a Batch containing the data.

Parameters

NameDescription

path

The path to the parquet file.

asset_name (optional)

The asset name to use for the parquet file, should you wish to use or refer to it again.

**kwargs

Additional kwargs to pass to the parquet reader.

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral ParquetAsset.

read_pickle

Signature

read_pickle(
filepath_or_buffer: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
compression: CompressionOptions = 'infer',
storage_options: Union[StorageOptions,
None] = None,
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read a pickle file and return a Batch containing the data.

Parameters

NameDescription

filepath_or_buffer

The path to the pickle file.

asset_name (optional)

The asset name to use for the pickle file, should you wish to use or refer to it again.

**kwargs

Additional kwargs to pass to the pickle reader.

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral PickleAsset.

read_sas

Signature

read_sas(
filepath_or_buffer: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
format: Optional[str] = None,
index: Optional[str] = None,
encoding: Optional[str] = None,
chunksize: Optional[int] = None,
iterator: bool = False,
compression: CompressionOptions = 'infer',
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read a SAS file and return a Batch containing the data.

Parameters

NameDescription

filepath_or_buffer

The path to the SAS file or a URL pointing to the SAS file.

asset_name

The name of the SAS asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_sas().

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral SASAsset.

read_spss

Signature

read_spss(
path: pydantic.v1.types.FilePath,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
usecols: Optional[Union[int,
str,
Sequence[int]]] = None,
convert_categoricals: bool = True,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read an SPSS file and return a Batch containing the data.

Parameters

NameDescription

path

The path to the SPSS file.

asset_name

The name of the SPSS asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_spss().

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral SPSSAsset.

read_sql

Signature

read_sql(
sql: sa.select | sa.text | str,
con: sqlalchemy.Engine | sqlite3.Connection | str,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
index_col: Optional[Union[str,
List[str]]] = None,
coerce_float: bool = True,
params: Any = None,
parse_dates: Any = None,
columns: Optional[List[str]] = None,
chunksize: Optional[int] = None,
dtype_backend: DtypeBackend = None,
dtype: Optional[dict] = None,
**extra_data: Any
) → Batch

Read a SQL query and return a Batch containing the data.

Parameters

NameDescription

sql

The SQL query to send to the database.

con

The SQLAlchemy connection engine or a string URL to connect to the database.

asset_name

The name of the SQL asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_sql().

Returns

TypeDescription

Batch

A Batch using an ephemeral SQLAsset.

read_sql_query

Signature

read_sql_query(
sql: sa.select | sa.text | str,
con: sqlalchemy.Engine | sqlite3.Connection | str,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
index_col: Optional[Union[str,
List[str]]] = None,
coerce_float: bool = True,
parse_dates: Optional[Union[List[str],
Dict[str,
str]]] = None,
chunksize: Optional[int] = None,
dtype: Optional[dict] = None,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → Batch

Read a SQL query and return a Batch containing the data.

Parameters

NameDescription

sql

The SQL query to send to the database.

con

The SQLAlchemy connection engine or a string URL to connect to the database.

asset_name

The name of the SQL query asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_sql_query().

Returns

TypeDescription

Batch

A Batch using an ephemeral SQLQueryAsset.

read_sql_table

Signature

read_sql_table(
table_name: str,
con: sqlalchemy.engine.base.Engine | str,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
schema: Optional[str] = None,
index_col: Optional[Union[str,
List[str]]] = None,
coerce_float: bool = True,
parse_dates: Optional[Union[List[str],
Dict[str,
str]]] = None,
columns: Optional[List[str]] = None,
chunksize: Optional[int] = None,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read a SQL table and return a Batch containing the data.

Parameters

NameDescription

table_name

The name of the SQL table to read.

con

The SQLAlchemy connection engine or a string URL to connect to the database.

asset_name

The name of the SQL table asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_sql_table().

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral SQLTableAsset.

read_stata

Signature

read_stata(
filepath_or_buffer: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
convert_dates: bool = True,
convert_categoricals: bool = True,
index_col: Optional[str] = None,
convert_missing: bool = False,
preserve_dtypes: bool = True,
columns: Union[Sequence[str],
None] = None,
order_categoricals: bool = True,
chunksize: Optional[int] = None,
iterator: bool = False,
compression: CompressionOptions = 'infer',
storage_options: Union[StorageOptions,
None] = None,
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read a Stata file and return a Batch containing the data.

Parameters

NameDescription

filepath_or_buffer

The path to the Stata file or a URL pointing to the Stata file.

asset_name

The name of the Stata asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_stata().

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral StataAsset.

read_table

Signature

read_table(
filepath_or_buffer: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
sep: Optional[str] = None,
delimiter: Optional[str] = None,
header: Union[int,
Sequence[int],
None,
Literal['infer']] = 'infer',
names: Union[Sequence[str],
None] = None,
index_col: Union[IndexLabel,
Literal[False],
None] = None,
usecols: Optional[Union[int,
str,
Sequence[int]]] = None,
dtype: Optional[dict] = None,
engine: Union[CSVEngine,
None] = None,
true_values: Optional[List] = None,
false_values: Optional[List] = None,
skipinitialspace: bool = False,
skiprows: Optional[Union[Sequence[int],
int]] = None,
skipfooter: int = 0,
nrows: Optional[int] = None,
na_values: Union[Sequence[str],
None] = None,
keep_default_na: bool = True,
na_filter: bool = True,
verbose: bool = False,
skip_blank_lines: bool = True,
parse_dates: Union[bool,
Sequence[str]] = False,
infer_datetime_format: bool = None,
keep_date_col: bool = False,
date_format: Optional[str] = None,
dayfirst: bool = False,
cache_dates: bool = True,
iterator: bool = False,
chunksize: Optional[int] = None,
compression: CompressionOptions = 'infer',
thousands: Optional[str] = None,
decimal: str = '.',
lineterminator: Optional[str] = None,
quotechar: str = '"',
quoting: int = 0,
doublequote: bool = True,
escapechar: Optional[str] = None,
comment: Optional[str] = None,
encoding: Optional[str] = None,
encoding_errors: Optional[str] = 'strict',
dialect: Optional[str] = None,
on_bad_lines: str = 'error',
delim_whitespace: bool = False,
low_memory: bool = True,
memory_map: bool = False,
float_precision: Optional[str] = None,
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read a Table file and return a Batch containing the data.

Parameters

NameDescription

filepath_or_buffer

The path to the Table file or a URL pointing to the Table file.

asset_name

The name of the Table asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_table().

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral TableAsset.

read_xml

Signature

read_xml(
path_or_buffer: pydantic.v1.types.FilePath | pydantic.v1.networks.AnyUrl,
asset_name: Optional[str] = None,
*,
name: str,
id: Optional[uuid.UUID] = None,
order_by: List[great_expectations.datasource.fluent.interfaces.Sorter] = None,
batch_metadata: Dict[str,
Any] = None,
batch_definitions: List[great_expectations.core.batch_definition.BatchDefinition] = None,
xpath: str = './*',
namespaces: Optional[Dict[str,
str]] = None,
elems_only: bool = False,
attrs_only: bool = False,
names: Union[Sequence[str],
None] = None,
dtype: Optional[dict] = None,
encoding: Optional[str] = 'utf-8',
stylesheet: Union[FilePath,
None] = None,
iterparse: Optional[Dict[str,
List[str]]] = None,
compression: CompressionOptions = 'infer',
storage_options: Union[StorageOptions,
None] = None,
dtype_backend: DtypeBackend = None,
**extra_data: Any
) → great_expectations.datasource.fluent.interfaces.Batch

Read an XML file and return a Batch containing the data.

Parameters

NameDescription

path_or_buffer

The path to the XML file or a URL pointing to the XML file.

asset_name

The name of the XML asset, should you wish to use it again.

**kwargs

Additional keyword arguments to pass to pandas.read_xml().

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces.Batch

A Batch using an ephemeral XMLAsset.