Hi,
We use the create_train_test_split method of the FeatureView class to create train/test data from Hopsworks Feature View, but we get the following error.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-19-019cb4911756> in <module>
7 test_end = "20220331"
8
----> 9 td_version, td_job = feature_view.create_train_test_split(
10 train_start=train_start,
11 train_end=train_end,
/srv/hops/anaconda/envs/theenv/lib/python3.8/site-packages/hsfs/feature_view.py in create_train_test_split(self, test_size, train_start, train_end, test_start, test_end, storage_connector, location, description, data_format, coalesce, seed, statistics_config, write_options)
512 test_size=test_size, train_end=train_end, test_start=test_start
513 )
--> 514 td = training_dataset.TrainingDataset(
515 name=self.name,
516 version=None,
/srv/hops/anaconda/envs/theenv/lib/python3.8/site-packages/hsfs/training_dataset.py in __init__(self, name, version, data_format, featurestore_id, location, event_start_time, event_end_time, coalesce, description, storage_connector, splits, validation_size, test_size, train_start, train_end, validation_start, validation_end, test_start, test_end, seed, created, creator, features, statistics_config, featurestore_name, id, inode_id, training_dataset_type, from_query, querydto, label, transformation_functions, train_split)
145 TrainingDatasetSplit.TEST: test_size,
146 }
--> 147 self._set_time_splits(
148 train_start,
149 train_end,
/srv/hops/anaconda/envs/theenv/lib/python3.8/site-packages/hsfs/training_dataset.py in _set_time_splits(self, train_start, train_end, validation_start, validation_end, test_start, test_end)
195
196 time_splits = list()
--> 197 self._append_time_split(
198 time_splits,
199 split_name=TrainingDatasetSplit.TRAIN,
/srv/hops/anaconda/envs/theenv/lib/python3.8/site-packages/hsfs/training_dataset.py in _append_time_split(self, time_splits, split_name, start_time, end_time)
230 TrainingDatasetSplit(
231 name=split_name,
--> 232 split_type=TrainingDatasetSplit.TIME_SERIES_SPLIT,
233 start_time=util.convert_event_time_to_timestamp(start_time),
234 end_time=util.convert_event_time_to_timestamp(end_time),
AttributeError: type object 'TrainingDatasetSplit' has no attribute 'TIME_SERIES_SPLIT'
Below are the codes we use.
feature_view = fs.get_feature_view(name="fraud_fv", version=1)
# Training/Test splits, datasets creation. Using timerange arguments.
train_start = "20220101"
train_end = "20220310"
test_start = "20220310"
test_end = "20220331"
td_version, td_job = feature_view.create_train_test_split(
train_start=train_start,
train_end=train_end,
test_start=test_start,
test_end=test_end,
data_format = "csv",
coalesce = True,
write_options = {'wait_for_job': True},
)
X_train, X_test, y_train, y_test = feature_view.get_train_test_split(version)
X_train = X_train.sort_values("datetime")
y_train = y_train.reindex(X_train.index)
X_test = X_test.sort_values("datetime")
y_test = y_test.reindex(X_test.index)
X_train.drop(["cc_num", "datetime"], axis=1, inplace=True)
X_test.drop(["cc_num","datetime"], axis=1, inplace=True)
Platform: Hopsworks 3.0.0
Hopsworks API: 3.0.5
Feature Store API: 3.0.5
MLOps API: 3.0.3
How can we fix these errors?
Best regards