aligned.sources.local.PartitionedParquetFileSource

class documentation

class PartitionedParquetFileSource(CodableBatchDataSource, ColumnFeatureMappable, DataFileReference, WritableFeatureSource, Deletable): (source)

View In Hierarchy

A source pointing to a Parquet file

Class Method	`multi_source_features_for`	Undocumented
Method	`__hash__`	Undocumented
Method	`all_between_dates`	Undocumented
Method	`all_data`	Undocumented
Async Method	`delete`	Undocumented
Async Method	`feature_view_code`	Setup the code needed to represent the data source as a feature view
Async Method	`insert`	Undocumented
Method	`job_group_key`	A key defining which sources can be grouped together in one request.
Async Method	`overwrite`	Undocumented
Async Method	`schema`	Returns the schema for the data source
Async Method	`to_lazy_polars`	Undocumented
Async Method	`to_pandas`	Undocumented
Async Method	`upsert`	Undocumented
Method	`with_view`	Undocumented
Async Method	`write_polars`	Undocumented
Class Variable	`config`	Undocumented
Class Variable	`date_formatter`	Undocumented
Class Variable	`directory`	Undocumented
Class Variable	`mapping_keys`	Undocumented
Class Variable	`partition_keys`	Undocumented
Class Variable	`type_name`	Undocumented
Property	`as_markdown`	Undocumented

Inherited from CodableBatchDataSource:

Class Method	`_deserialize`	Undocumented
Method	`_serialize`	Undocumented

Inherited from BatchDataSource (via CodableBatchDataSource):

Method	`all`	Undocumented
Method	`all_columns`	Undocumented
Method	`depends_on`	Undocumented
Method	`features_for`	Undocumented
Method	`filter`	Undocumented
Async Method	`freshness`	.table("my_table") .freshness()
Method	`location_id`	Undocumented
Method	`needed_configs`	Undocumented
Method	`source_id`	An id that identifies a source from others.
Method	`tags`	Undocumented
Method	`transform_with_polars`	Undocumented
Method	`with_loaded_at`	Undocumented

Inherited from ColumnFeatureMappable (via CodableBatchDataSource, BatchDataSource):

Method	`columns_for`	Undocumented
Method	`feature_identifier_for`	Undocumented
Method	`with_renames`	Undocumented

Inherited from DataFileReference (via CodableBatchDataSource, BatchDataSource, ColumnFeatureMappable):

Async Method	`read_pandas`	Undocumented
Async Method	`to_polars`	Undocumented
Async Method	`write_pandas`	Undocumented

@classmethod

def multi_source_features_for(cls, facts: RetrievalJob, requests: list[tuple[ParquetFileSource, RetrievalRequest]]) -> RetrievalJob: (source) ¶

overrides aligned.data_source.batch_data_source.BatchDataSource.multi_source_features_for

Undocumented

def __hash__(self) -> int: (source) ¶

overrides aligned.data_source.batch_data_source.BatchDataSource.__hash__

Undocumented

def all_between_dates(self, request: RetrievalRequest, start_date: datetime, end_date: datetime) -> RetrievalJob: (source) ¶

overrides aligned.data_source.batch_data_source.BatchDataSource.all_between_dates

Undocumented

def all_data(self, request: RetrievalRequest, limit: int | None) -> RetrievalJob: (source) ¶

overrides aligned.data_source.batch_data_source.BatchDataSource.all_data

Undocumented

async def delete(self): (source) ¶

overrides aligned.sources.local.Deletable.delete

Undocumented

async def feature_view_code(self, view_name: str) -> str: (source) ¶

overrides aligned.data_source.batch_data_source.BatchDataSource.feature_view_code

Setup the code needed to represent the data source as a feature view

```python FileSource.parquet("my_path.parquet").feature_view_code(view_name="my_view")

>>> """from aligned import FeatureView, String, Int64, Float

class MyView(FeatureView):

metadata = FeatureView.metadata_with(

name="Embarked", description="some description", batch_source=FileSource.parquest("my_path.parquet") stream_source=None,

)

Passenger_id = Int64() Survived = Int64() Pclass = Int64() Name = String() Sex = String() Age = Float() Sibsp = Int64() Parch = Int64() Ticket = String() Fare = Float() Cabin = String() Embarked = String()"""

```

Returns:: str: The code needed to setup a basic feature view

async def insert(self, job: RetrievalJob, request: RetrievalRequest): (source) ¶

overrides aligned.feature_source.WritableFeatureSource.insert

Undocumented

def job_group_key(self) -> str: (source) ¶

overrides aligned.data_source.batch_data_source.BatchDataSource.job_group_key

A key defining which sources can be grouped together in one request.

async def overwrite(self, job: RetrievalJob, request: RetrievalRequest): (source) ¶

overrides aligned.feature_source.WritableFeatureSource.overwrite

Undocumented

async def schema(self) -> dict[str, FeatureType]: (source) ¶

overrides aligned.data_source.batch_data_source.BatchDataSource.schema

Returns the schema for the data source

`python source = FileSource.parquet_at('test_data/titanic.parquet') schema = await source.schema() >>> {'passenger_id': FeatureType(name='int64'), ...} `

Returns:: dict[str, FeatureType]: A dictionary containing the column name and the feature type

async def to_lazy_polars(self) -> pl.LazyFrame: (source) ¶

overrides aligned.data_file.DataFileReference.to_lazy_polars

Undocumented

async def to_pandas(self) -> pd.DataFrame: (source) ¶

overrides aligned.data_file.DataFileReference.to_pandas

Undocumented

async def upsert(self, job: RetrievalJob, request: RetrievalRequest): (source) ¶

overrides aligned.feature_source.WritableFeatureSource.upsert

Undocumented

def with_view(self, view: CompiledFeatureView) -> PartitionedParquetFileSource: (source) ¶

overrides aligned.data_source.batch_data_source.BatchDataSource.with_view

Undocumented

async def write_polars(self, df: pl.LazyFrame): (source) ¶

overrides aligned.data_file.DataFileReference.write_polars

Undocumented

config: ParquetConfig = (source) ¶

Undocumented

date_formatter: DateFormatter = (source) ¶

Undocumented

directory: PathResolver = (source) ¶

Undocumented

mapping_keys: dict[str, str] = (source) ¶

overrides aligned.data_source.batch_data_source.ColumnFeatureMappable.mapping_keys

Undocumented

partition_keys: list[str] = (source) ¶

Undocumented

type_name: str = (source) ¶

overrides aligned.data_source.batch_data_source.CodableBatchDataSource.type_name

Undocumented

@property

as_markdown: str = (source) ¶

overrides aligned.data_source.batch_data_source.CodableBatchDataSource.as_markdown

Undocumented