public class IcebergSource
extends java.lang.Object
implements org.apache.spark.sql.sources.DataSourceRegister, org.apache.spark.sql.connector.catalog.SupportsCatalogOptions
How paths/tables are loaded when using spark.read().format("iceberg").load(table)
table = "file:///path/to/table" -> loads a HadoopTable at given path table = "tablename" -> loads currentCatalog.currentNamespace.tablename table = "catalog.tablename" -> load "tablename" from the specified catalog. table = "namespace.tablename" -> load "namespace.tablename" from current catalog table = "catalog.namespace.tablename" -> "namespace.tablename" from the specified catalog. table = "namespace1.namespace2.tablename" -> load "namespace1.namespace2.tablename" from current catalog
The above list is in order of priority. For example: a matching catalog will take priority over any namespace resolution.
Constructor and Description |
---|
IcebergSource() |
Modifier and Type | Method and Description |
---|---|
java.lang.String |
extractCatalog(org.apache.spark.sql.util.CaseInsensitiveStringMap options) |
org.apache.spark.sql.connector.catalog.Identifier |
extractIdentifier(org.apache.spark.sql.util.CaseInsensitiveStringMap options) |
java.util.Optional<java.lang.String> |
extractTimeTravelTimestamp(org.apache.spark.sql.util.CaseInsensitiveStringMap options) |
java.util.Optional<java.lang.String> |
extractTimeTravelVersion(org.apache.spark.sql.util.CaseInsensitiveStringMap options) |
org.apache.spark.sql.connector.catalog.Table |
getTable(org.apache.spark.sql.types.StructType schema,
org.apache.spark.sql.connector.expressions.Transform[] partitioning,
java.util.Map<java.lang.String,java.lang.String> options) |
org.apache.spark.sql.connector.expressions.Transform[] |
inferPartitioning(org.apache.spark.sql.util.CaseInsensitiveStringMap options) |
org.apache.spark.sql.types.StructType |
inferSchema(org.apache.spark.sql.util.CaseInsensitiveStringMap options) |
java.lang.String |
shortName() |
boolean |
supportsExternalMetadata() |
public java.lang.String shortName()
shortName
in interface org.apache.spark.sql.sources.DataSourceRegister
public org.apache.spark.sql.types.StructType inferSchema(org.apache.spark.sql.util.CaseInsensitiveStringMap options)
inferSchema
in interface org.apache.spark.sql.connector.catalog.TableProvider
public org.apache.spark.sql.connector.expressions.Transform[] inferPartitioning(org.apache.spark.sql.util.CaseInsensitiveStringMap options)
inferPartitioning
in interface org.apache.spark.sql.connector.catalog.TableProvider
public boolean supportsExternalMetadata()
supportsExternalMetadata
in interface org.apache.spark.sql.connector.catalog.TableProvider
public org.apache.spark.sql.connector.catalog.Table getTable(org.apache.spark.sql.types.StructType schema, org.apache.spark.sql.connector.expressions.Transform[] partitioning, java.util.Map<java.lang.String,java.lang.String> options)
getTable
in interface org.apache.spark.sql.connector.catalog.TableProvider
public org.apache.spark.sql.connector.catalog.Identifier extractIdentifier(org.apache.spark.sql.util.CaseInsensitiveStringMap options)
extractIdentifier
in interface org.apache.spark.sql.connector.catalog.SupportsCatalogOptions
public java.lang.String extractCatalog(org.apache.spark.sql.util.CaseInsensitiveStringMap options)
extractCatalog
in interface org.apache.spark.sql.connector.catalog.SupportsCatalogOptions
public java.util.Optional<java.lang.String> extractTimeTravelVersion(org.apache.spark.sql.util.CaseInsensitiveStringMap options)
extractTimeTravelVersion
in interface org.apache.spark.sql.connector.catalog.SupportsCatalogOptions
public java.util.Optional<java.lang.String> extractTimeTravelTimestamp(org.apache.spark.sql.util.CaseInsensitiveStringMap options)
extractTimeTravelTimestamp
in interface org.apache.spark.sql.connector.catalog.SupportsCatalogOptions