aligned.sources.random_source.RandomDataSource

class documentation

class RandomDataSource(CodableBatchDataSource, DataFileReference, WritableFeatureSource): (source)

Constructors: RandomDataSource.with_values(values, seed), RandomDataSource(default_data_size, seed, partial_data, fill_mode)

View In Hierarchy

The DummyDataBatchSource is a data source that generates random data for a given request. This can be useful for testing and development purposes.

It will use the data types and constraints defined on a feature to generate the data.

```python from aligned import feature_view, Int64, String, DummyDataBatchSource

@feature_view(: source=RandomDataSource(),

) class MyView:

passenger_id = Int64().as_entity() survived = Bool() age = Float().lower_bound(0).upper_bound(100) name = String() sex = String().accepted_values(["male", "female"])

```

Class Method	`multi_source_features_for`	Undocumented
Static Method	`with_values`	Undocumented
Method	`__init__`	Undocumented
Method	`all_between_dates`	Undocumented
Method	`all_data`	Undocumented
Method	`depends_on`	Undocumented
Async Method	`insert`	Undocumented
Method	`job_group_key`	A key defining which sources can be grouped together in one request.
Async Method	`overwrite`	Undocumented
Async Method	`schema`	Returns the schema for the data source
Async Method	`upsert`	Undocumented
Async Method	`write_polars`	Undocumented
Class Variable	`type_name`	Undocumented
Instance Variable	`default_data_size`	Undocumented
Instance Variable	`fill_mode`	Undocumented
Instance Variable	`partial_data`	Undocumented
Instance Variable	`seed`	Undocumented

Inherited from CodableBatchDataSource:

Property	`as_markdown`	Undocumented
Class Method	`_deserialize`	Undocumented
Method	`_serialize`	Undocumented

Inherited from BatchDataSource (via CodableBatchDataSource):

Method	`__hash__`	Undocumented
Method	`all`	Undocumented
Method	`all_columns`	Undocumented
Async Method	`feature_view_code`	Setup the code needed to represent the data source as a feature view
Method	`features_for`	Undocumented
Method	`filter`	Undocumented
Async Method	`freshness`	.table("my_table") .freshness()
Method	`location_id`	Undocumented
Method	`needed_configs`	Undocumented
Method	`source_id`	An id that identifies a source from others.
Method	`tags`	Undocumented
Method	`transform_with_polars`	Undocumented
Method	`with_loaded_at`	Undocumented
Method	`with_view`	Undocumented

Inherited from DataFileReference (via CodableBatchDataSource, BatchDataSource):

Async Method	`read_pandas`	Undocumented
Async Method	`to_lazy_polars`	Undocumented
Async Method	`to_pandas`	Undocumented
Async Method	`to_polars`	Undocumented
Async Method	`write_pandas`	Undocumented

@classmethod

def multi_source_features_for(cls: type[RandomDataSource], facts: RetrievalJob, requests: list[tuple[RandomDataSource, RetrievalRequest]]) -> RetrievalJob: (source) ¶

overrides aligned.data_source.batch_data_source.BatchDataSource.multi_source_features_for

Undocumented

@staticmethod

def with_values(values: dict[str, object], seed: int | None = None) -> RandomDataSource: (source) ¶

Undocumented

def __init__(self, default_data_size: int = 10000, seed: int | None = None, partial_data: pl.DataFrame | None = None, fill_mode: FillMode = 'duplicate'): (source) ¶

Undocumented

def all_between_dates(self, request: RetrievalRequest, start_date: datetime, end_date: datetime) -> RetrievalJob: (source) ¶

overrides aligned.data_source.batch_data_source.BatchDataSource.all_between_dates

Undocumented

def all_data(self, request: RetrievalRequest, limit: int | None = None) -> RetrievalJob: (source) ¶

overrides aligned.data_source.batch_data_source.BatchDataSource.all_data

Undocumented

def depends_on(self) -> set[FeatureLocation]: (source) ¶

overrides aligned.data_source.batch_data_source.BatchDataSource.depends_on

Undocumented

async def insert(self, job: RetrievalJob, request: RetrievalRequest): (source) ¶

overrides aligned.feature_source.WritableFeatureSource.insert

Undocumented

def job_group_key(self) -> str: (source) ¶

overrides aligned.data_source.batch_data_source.BatchDataSource.job_group_key

A key defining which sources can be grouped together in one request.

async def overwrite(self, job: RetrievalJob, request: RetrievalRequest): (source) ¶

overrides aligned.feature_source.WritableFeatureSource.overwrite

Undocumented

async def schema(self) -> dict[str, FeatureType]: (source) ¶

overrides aligned.data_source.batch_data_source.BatchDataSource.schema

Returns the schema for the data source

`python source = FileSource.parquet_at('test_data/titanic.parquet') schema = await source.schema() >>> {'passenger_id': FeatureType(name='int64'), ...} `

Returns:: dict[str, FeatureType]: A dictionary containing the column name and the feature type

async def upsert(self, job: RetrievalJob, request: RetrievalRequest): (source) ¶

overrides aligned.feature_source.WritableFeatureSource.upsert

Undocumented

async def write_polars(self, df: pl.LazyFrame): (source) ¶

overrides aligned.data_file.DataFileReference.write_polars

Undocumented

type_name: str = (source) ¶

overrides aligned.data_source.batch_data_source.CodableBatchDataSource.type_name

Undocumented

default_data_size: int = (source) ¶

Undocumented

fill_mode: FillMode = (source) ¶

Undocumented

partial_data = (source) ¶

Undocumented

seed: int | None = (source) ¶

Undocumented