Skip to main content
Version: 1.18.0

SparkDBFSDatasource

Signature

class great_expectations.datasource.fluent.SparkDBFSDatasource(
*,
type: Literal['spark_dbfs'] = 'spark_dbfs',
name: str,
id: Optional[uuid.UUID] = None,
assets: List[Union[great_expectations.datasource.fluent.data_asset.path.spark.csv_asset.CSVAsset,
great_expectations.datasource.fluent.data_asset.path.spark.csv_asset.DirectoryCSVAsset,
great_expectations.datasource.fluent.data_asset.path.spark.parquet_asset.ParquetAsset,
great_expectations.datasource.fluent.data_asset.path.spark.parquet_asset.DirectoryParquetAsset,
great_expectations.datasource.fluent.data_asset.path.spark.orc_asset.ORCAsset,
great_expectations.datasource.fluent.data_asset.path.spark.orc_asset.DirectoryORCAsset,
great_expectations.datasource.fluent.data_asset.path.spark.json_asset.JSONAsset,
great_expectations.datasource.fluent.data_asset.path.spark.json_asset.DirectoryJSONAsset,
great_expectations.datasource.fluent.data_asset.path.spark.text_asset.TextAsset,
great_expectations.datasource.fluent.data_asset.path.spark.text_asset.DirectoryTextAsset,
great_expectations.datasource.fluent.data_asset.path.spark.delta_asset.DeltaAsset,
great_expectations.datasource.fluent.data_asset.path.spark.delta_asset.DirectoryDeltaAsset]] = [],
spark_config: Optional[Dict[pydantic.v1.types.StrictStr,
Union[pydantic.v1.types.StrictStr,
pydantic.v1.types.StrictInt,
pydantic.v1.types.StrictFloat,
pydantic.v1.types.StrictBool]]] = None,
force_reuse_spark_context: bool = True,
persist: bool = True,
base_directory: pathlib.Path,
data_context_root_directory: Optional[pathlib.Path] = None
)

Spark based Datasource for DataBricks File System (DBFS) based data assets.

Deprecated since version 1.16.0: DBFS is deprecated by Databricks. Use Unity Catalog volumes, external locations, or workspace files with SparkFilesystemDatasource instead.

Methods

add_csv_asset

Signature

add_csv_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fd1c4e2ed20> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fd1c4e2ede0> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fd1c4e2ef30> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fd1c4e2f0e0> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fd1c4e2f1a0> = None,
pathGlobFilter: typing.Optional[typing.Union[bool,
str]] = None,
recursiveFileLookup: typing.Optional[typing.Union[bool,
str]] = None,
modifiedBefore: typing.Optional[typing.Union[bool,
str]] = None,
modifiedAfter: typing.Optional[typing.Union[bool,
str]] = None,
schema: typing.Optional[typing.Union[great_expectations.datasource.fluent.serializable_types.pyspark.SerializableStructType,
str]] = None,
sep: typing.Optional[str] = None,
encoding: typing.Optional[str] = None,
quote: typing.Optional[str] = None,
escape: typing.Optional[str] = None,
comment: typing.Optional[str] = None,
header: typing.Optional[typing.Union[bool,
str]] = None,
inferSchema: typing.Optional[typing.Union[bool,
str]] = None,
ignoreLeadingWhiteSpace: typing.Optional[typing.Union[bool,
str]] = None,
ignoreTrailingWhiteSpace: typing.Optional[typing.Union[bool,
str]] = None,
nullValue: typing.Optional[str] = None,
nanValue: typing.Optional[str] = None,
positiveInf: typing.Optional[str] = None,
negativeInf: typing.Optional[str] = None,
dateFormat: typing.Optional[str] = None,
timestampFormat: typing.Optional[str] = None,
maxColumns: typing.Optional[typing.Union[int,
str]] = None,
maxCharsPerColumn: typing.Optional[typing.Union[int,
str]] = None,
maxMalformedLogPerPartition: typing.Optional[typing.Union[int,
str]] = None,
mode: typing.Optional[typing.Literal['PERMISSIVE',
'DROPMALFORMED',
'FAILFAST']] = None,
columnNameOfCorruptRecord: typing.Optional[str] = None,
multiLine: typing.Optional[typing.Union[bool,
str]] = None,
charToEscapeQuoteEscaping: typing.Optional[str] = None,
samplingRatio: typing.Optional[typing.Union[float,
str]] = None,
enforceSchema: typing.Optional[typing.Union[bool,
str]] = None,
emptyValue: typing.Optional[str] = None,
locale: typing.Optional[str] = None,
lineSep: typing.Optional[str] = None,
unescapedQuoteHandling: typing.Optional[typing.Literal['STOP_AT_CLOSING_QUOTE',
'BACK_TO_DELIMITER',
'STOP_AT_DELIMITER',
'SKIP_VALUE',
'RAISE_ERROR']] = None
) → pydantic.BaseModel

Add a csv asset to the datasource.

add_delta_asset

Signature

add_delta_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fd1c4ceac90> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fd1c4cead50> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fd1c4ceaea0> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fd1c4ceb050> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fd1c4ceb110> = None,
timestampAsOf: typing.Optional[str] = None,
versionAsOf: typing.Optional[str] = None
) → pydantic.BaseModel

Add a delta asset to the datasource.

add_directory_csv_asset

Signature

add_directory_csv_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fd1c4ce9490> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fd1c4ce9550> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fd1c4ce96a0> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fd1c4ce9850> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fd1c4ce9910> = None,
pathGlobFilter: typing.Optional[typing.Union[bool,
str]] = None,
recursiveFileLookup: typing.Optional[typing.Union[bool,
str]] = None,
modifiedBefore: typing.Optional[typing.Union[bool,
str]] = None,
modifiedAfter: typing.Optional[typing.Union[bool,
str]] = None,
schema: typing.Optional[typing.Union[great_expectations.datasource.fluent.serializable_types.pyspark.SerializableStructType,
str]] = None,
sep: typing.Optional[str] = None,
encoding: typing.Optional[str] = None,
quote: typing.Optional[str] = None,
escape: typing.Optional[str] = None,
comment: typing.Optional[str] = None,
header: typing.Optional[typing.Union[bool,
str]] = None,
inferSchema: typing.Optional[typing.Union[bool,
str]] = None,
ignoreLeadingWhiteSpace: typing.Optional[typing.Union[bool,
str]] = None,
ignoreTrailingWhiteSpace: typing.Optional[typing.Union[bool,
str]] = None,
nullValue: typing.Optional[str] = None,
nanValue: typing.Optional[str] = None,
positiveInf: typing.Optional[str] = None,
negativeInf: typing.Optional[str] = None,
dateFormat: typing.Optional[str] = None,
timestampFormat: typing.Optional[str] = None,
maxColumns: typing.Optional[typing.Union[int,
str]] = None,
maxCharsPerColumn: typing.Optional[typing.Union[int,
str]] = None,
maxMalformedLogPerPartition: typing.Optional[typing.Union[int,
str]] = None,
mode: typing.Optional[typing.Literal['PERMISSIVE',
'DROPMALFORMED',
'FAILFAST']] = None,
columnNameOfCorruptRecord: typing.Optional[str] = None,
multiLine: typing.Optional[typing.Union[bool,
str]] = None,
charToEscapeQuoteEscaping: typing.Optional[str] = None,
samplingRatio: typing.Optional[typing.Union[float,
str]] = None,
enforceSchema: typing.Optional[typing.Union[bool,
str]] = None,
emptyValue: typing.Optional[str] = None,
locale: typing.Optional[str] = None,
lineSep: typing.Optional[str] = None,
unescapedQuoteHandling: typing.Optional[typing.Literal['STOP_AT_CLOSING_QUOTE',
'BACK_TO_DELIMITER',
'STOP_AT_DELIMITER',
'SKIP_VALUE',
'RAISE_ERROR']] = None,
data_directory: pathlib.Path
) → pydantic.BaseModel

Add a directory_csv asset to the datasource.

add_directory_delta_asset

Signature

add_directory_delta_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fd1c4cebf20> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fd1c4cebfe0> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fd1c4d08170> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fd1c4d08320> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fd1c4d083e0> = None,
timestampAsOf: typing.Optional[str] = None,
versionAsOf: typing.Optional[str] = None,
data_directory: pathlib.Path
) → pydantic.BaseModel

Add a directory_delta asset to the datasource.

add_directory_json_asset

Signature

add_directory_json_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fd1c4d35e80> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fd1c4d35f40> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fd1c4d36090> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fd1c4d36240> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fd1c4d36300> = None,
pathGlobFilter: typing.Optional[typing.Union[bool,
str]] = None,
recursiveFileLookup: typing.Optional[typing.Union[bool,
str]] = None,
modifiedBefore: typing.Optional[typing.Union[bool,
str]] = None,
modifiedAfter: typing.Optional[typing.Union[bool,
str]] = None,
schema: typing.Optional[typing.Union[great_expectations.datasource.fluent.serializable_types.pyspark.SerializableStructType,
str]] = None,
primitivesAsString: typing.Optional[typing.Union[bool,
str]] = None,
prefersDecimal: typing.Optional[typing.Union[bool,
str]] = None,
allowComments: typing.Optional[typing.Union[bool,
str]] = None,
allowUnquotedFieldNames: typing.Optional[typing.Union[bool,
str]] = None,
allowSingleQuotes: typing.Optional[typing.Union[bool,
str]] = None,
allowNumericLeadingZero: typing.Optional[typing.Union[bool,
str]] = None,
allowBackslashEscapingAnyCharacter: typing.Optional[typing.Union[bool,
str]] = None,
mode: typing.Optional[typing.Literal['PERMISSIVE',
'DROPMALFORMED',
'FAILFAST']] = None,
columnNameOfCorruptRecord: typing.Optional[str] = None,
dateFormat: typing.Optional[str] = None,
timestampFormat: typing.Optional[str] = None,
multiLine: typing.Optional[typing.Union[bool,
str]] = None,
allowUnquotedControlChars: typing.Optional[typing.Union[bool,
str]] = None,
lineSep: typing.Optional[str] = None,
samplingRatio: typing.Optional[typing.Union[float,
str]] = None,
dropFieldIfAllNull: typing.Optional[typing.Union[bool,
str]] = None,
encoding: typing.Optional[str] = None,
locale: typing.Optional[str] = None,
allowNonNumericNumbers: typing.Optional[typing.Union[bool,
str]] = None,
data_directory: pathlib.Path
) → pydantic.BaseModel

Add a directory_json asset to the datasource.

add_directory_orc_asset

Signature

add_directory_orc_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b616d0> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b61790> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b618e0> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b61a90> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b61b50> = None,
pathGlobFilter: typing.Optional[typing.Union[bool,
str]] = None,
recursiveFileLookup: typing.Optional[typing.Union[bool,
str]] = None,
modifiedBefore: typing.Optional[typing.Union[bool,
str]] = None,
modifiedAfter: typing.Optional[typing.Union[bool,
str]] = None,
mergeSchema: typing.Optional[typing.Union[bool,
str]] = False,
data_directory: pathlib.Path
) → pydantic.BaseModel

Add a directory_orc asset to the datasource.

add_directory_parquet_asset

Signature

add_directory_parquet_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b81250> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b81310> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b81460> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b81610> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b816d0> = None,
pathGlobFilter: typing.Optional[typing.Union[bool,
str]] = None,
recursiveFileLookup: typing.Optional[typing.Union[bool,
str]] = None,
modifiedBefore: typing.Optional[typing.Union[bool,
str]] = None,
modifiedAfter: typing.Optional[typing.Union[bool,
str]] = None,
mergeSchema: typing.Optional[typing.Union[bool,
str]] = None,
datetimeRebaseMode: typing.Optional[typing.Literal['EXCEPTION',
'CORRECTED',
'LEGACY']] = None,
int96RebaseMode: typing.Optional[typing.Literal['EXCEPTION',
'CORRECTED',
'LEGACY']] = None,
data_directory: pathlib.Path
) → pydantic.BaseModel

Add a directory_parquet asset to the datasource.

add_directory_text_asset

Signature

add_directory_text_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b9ca70> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b9cb30> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b9cc80> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b9ce30> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b9cef0> = None,
pathGlobFilter: typing.Optional[typing.Union[bool,
str]] = None,
recursiveFileLookup: typing.Optional[typing.Union[bool,
str]] = None,
modifiedBefore: typing.Optional[typing.Union[bool,
str]] = None,
modifiedAfter: typing.Optional[typing.Union[bool,
str]] = None,
wholetext: bool = False,
lineSep: typing.Optional[str] = None,
data_directory: pathlib.Path
) → pydantic.BaseModel

Add a directory_text asset to the datasource.

add_json_asset

Signature

add_json_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fd1c4d0b6e0> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fd1c4d0b9b0> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fd1c4d0bb00> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fd1c4d0bcb0> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fd1c4d0bd70> = None,
pathGlobFilter: typing.Optional[typing.Union[bool,
str]] = None,
recursiveFileLookup: typing.Optional[typing.Union[bool,
str]] = None,
modifiedBefore: typing.Optional[typing.Union[bool,
str]] = None,
modifiedAfter: typing.Optional[typing.Union[bool,
str]] = None,
schema: typing.Optional[typing.Union[great_expectations.datasource.fluent.serializable_types.pyspark.SerializableStructType,
str]] = None,
primitivesAsString: typing.Optional[typing.Union[bool,
str]] = None,
prefersDecimal: typing.Optional[typing.Union[bool,
str]] = None,
allowComments: typing.Optional[typing.Union[bool,
str]] = None,
allowUnquotedFieldNames: typing.Optional[typing.Union[bool,
str]] = None,
allowSingleQuotes: typing.Optional[typing.Union[bool,
str]] = None,
allowNumericLeadingZero: typing.Optional[typing.Union[bool,
str]] = None,
allowBackslashEscapingAnyCharacter: typing.Optional[typing.Union[bool,
str]] = None,
mode: typing.Optional[typing.Literal['PERMISSIVE',
'DROPMALFORMED',
'FAILFAST']] = None,
columnNameOfCorruptRecord: typing.Optional[str] = None,
dateFormat: typing.Optional[str] = None,
timestampFormat: typing.Optional[str] = None,
multiLine: typing.Optional[typing.Union[bool,
str]] = None,
allowUnquotedControlChars: typing.Optional[typing.Union[bool,
str]] = None,
lineSep: typing.Optional[str] = None,
samplingRatio: typing.Optional[typing.Union[float,
str]] = None,
dropFieldIfAllNull: typing.Optional[typing.Union[bool,
str]] = None,
encoding: typing.Optional[str] = None,
locale: typing.Optional[str] = None,
allowNonNumericNumbers: typing.Optional[typing.Union[bool,
str]] = None
) → pydantic.BaseModel

Add a json asset to the datasource.

add_orc_asset

Signature

add_orc_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b60200> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b602c0> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b60410> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b605c0> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b60680> = None,
pathGlobFilter: typing.Optional[typing.Union[bool,
str]] = None,
recursiveFileLookup: typing.Optional[typing.Union[bool,
str]] = None,
modifiedBefore: typing.Optional[typing.Union[bool,
str]] = None,
modifiedAfter: typing.Optional[typing.Union[bool,
str]] = None,
mergeSchema: typing.Optional[typing.Union[bool,
str]] = False
) → pydantic.BaseModel

Add an orc asset to the datasource.

add_parquet_asset

Signature

add_parquet_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b63c20> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b63ce0> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b63e30> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b63fe0> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b800e0> = None,
pathGlobFilter: typing.Optional[typing.Union[bool,
str]] = None,
recursiveFileLookup: typing.Optional[typing.Union[bool,
str]] = None,
modifiedBefore: typing.Optional[typing.Union[bool,
str]] = None,
modifiedAfter: typing.Optional[typing.Union[bool,
str]] = None,
mergeSchema: typing.Optional[typing.Union[bool,
str]] = None,
datetimeRebaseMode: typing.Optional[typing.Literal['EXCEPTION',
'CORRECTED',
'LEGACY']] = None,
int96RebaseMode: typing.Optional[typing.Literal['EXCEPTION',
'CORRECTED',
'LEGACY']] = None
) → pydantic.BaseModel

Add a parquet asset to the datasource.

add_text_asset

Signature

add_text_asset(
name: str,
*,
id: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b834d0> = None,
order_by: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b83590> = None,
batch_metadata: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b836e0> = None,
batch_definitions: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b83890> = None,
connect_options: <pydantic.v1.fields.DeferredType object at 0x7fd1c4b83950> = None,
pathGlobFilter: typing.Optional[typing.Union[bool,
str]] = None,
recursiveFileLookup: typing.Optional[typing.Union[bool,
str]] = None,
modifiedBefore: typing.Optional[typing.Union[bool,
str]] = None,
modifiedAfter: typing.Optional[typing.Union[bool,
str]] = None,
wholetext: bool = False,
lineSep: typing.Optional[str] = None
) → pydantic.BaseModel

Add a text asset to the datasource.

delete_asset

Signature

delete_asset(
name: str
)None

Removes the DataAsset referred to by asset_name from internal list of available DataAsset objects.

Parameters

NameDescription

name

name of DataAsset to be deleted.

get_asset

Signature

get_asset(
name: str
) → great_expectations.datasource.fluent.interfaces._DataAssetT

Returns the DataAsset referred to by asset_name

Parameters

NameDescription

name

name of DataAsset sought.

Returns

TypeDescription

great_expectations.datasource.fluent.interfaces._DataAssetT

if named "DataAsset" object exists; otherwise, exception is raised.