AttributeError: type object 'TrainingDatasetSplit' has no attribute 'TIME_SERIES_SPLIT'

Hi,

We use the create_train_test_split method of the FeatureView class to create train/test data from Hopsworks Feature View, but we get the following error.

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-19-019cb4911756> in <module>
      7 test_end = "20220331"
      8 
----> 9 td_version, td_job = feature_view.create_train_test_split(
     10     train_start=train_start,
     11     train_end=train_end,

/srv/hops/anaconda/envs/theenv/lib/python3.8/site-packages/hsfs/feature_view.py in create_train_test_split(self, test_size, train_start, train_end, test_start, test_end, storage_connector, location, description, data_format, coalesce, seed, statistics_config, write_options)
    512             test_size=test_size, train_end=train_end, test_start=test_start
    513         )
--> 514         td = training_dataset.TrainingDataset(
    515             name=self.name,
    516             version=None,

/srv/hops/anaconda/envs/theenv/lib/python3.8/site-packages/hsfs/training_dataset.py in __init__(self, name, version, data_format, featurestore_id, location, event_start_time, event_end_time, coalesce, description, storage_connector, splits, validation_size, test_size, train_start, train_end, validation_start, validation_end, test_start, test_end, seed, created, creator, features, statistics_config, featurestore_name, id, inode_id, training_dataset_type, from_query, querydto, label, transformation_functions, train_split)
    145                     TrainingDatasetSplit.TEST: test_size,
    146                 }
--> 147             self._set_time_splits(
    148                 train_start,
    149                 train_end,

/srv/hops/anaconda/envs/theenv/lib/python3.8/site-packages/hsfs/training_dataset.py in _set_time_splits(self, train_start, train_end, validation_start, validation_end, test_start, test_end)
    195 
    196         time_splits = list()
--> 197         self._append_time_split(
    198             time_splits,
    199             split_name=TrainingDatasetSplit.TRAIN,

/srv/hops/anaconda/envs/theenv/lib/python3.8/site-packages/hsfs/training_dataset.py in _append_time_split(self, time_splits, split_name, start_time, end_time)
    230                 TrainingDatasetSplit(
    231                     name=split_name,
--> 232                     split_type=TrainingDatasetSplit.TIME_SERIES_SPLIT,
    233                     start_time=util.convert_event_time_to_timestamp(start_time),
    234                     end_time=util.convert_event_time_to_timestamp(end_time),

AttributeError: type object 'TrainingDatasetSplit' has no attribute 'TIME_SERIES_SPLIT'

Below are the codes we use.

feature_view = fs.get_feature_view(name="fraud_fv", version=1)

# Training/Test splits, datasets creation. Using timerange arguments.
train_start = "20220101"
train_end = "20220310"
test_start = "20220310"
test_end = "20220331"

td_version, td_job = feature_view.create_train_test_split(
    train_start=train_start,
    train_end=train_end,
    test_start=test_start,
    test_end=test_end,
    data_format = "csv",
    coalesce = True,
    write_options = {'wait_for_job': True},
    )

X_train, X_test, y_train, y_test = feature_view.get_train_test_split(version)

X_train = X_train.sort_values("datetime")
y_train = y_train.reindex(X_train.index)

X_test = X_test.sort_values("datetime")
y_test = y_test.reindex(X_test.index)

X_train.drop(["cc_num", "datetime"], axis=1, inplace=True)
X_test.drop(["cc_num","datetime"], axis=1, inplace=True)

Platform: Hopsworks 3.0.0
Hopsworks API: 3.0.5
Feature Store API: 3.0.5
MLOps API: 3.0.3

How can we fix these errors?

Best regards

Hi @cmlops,

Time base splits are properly supported from Hopsworks 3.1, so you need to update your Hopsworks instance and the API libraries.

Regards,
Fabio

Hi,

Thank you for your help. As you mentioned, I updated the libraries via Hopsworks UI as follows:

Hopsworks API: 3.1.1
Feature Store API: 3.1.2
MLOps API: 3.1.0

However, I am still getting errors.

---------------------------------------------------------------------------
RestAPIError                              Traceback (most recent call last)
<ipython-input-14-019cb4911756> in <module>
      7 test_end = "20220331"
      8 
----> 9 td_version, td_job = feature_view.create_train_test_split(
     10     train_start=train_start,
     11     train_end=train_end,

/srv/hops/anaconda/envs/theenv/lib/python3.8/site-packages/hsfs/feature_view.py in create_train_test_split(self, test_size, train_start, train_end, test_start, test_end, storage_connector, location, description, extra_filter, data_format, coalesce, seed, statistics_config, write_options)
   1030         )
   1031         # td_job is used only if the python engine is used
-> 1032         td, td_job = self._feature_view_engine.create_training_dataset(
   1033             self, td, write_options
   1034         )

/srv/hops/anaconda/envs/theenv/lib/python3.8/site-packages/hsfs/core/feature_view_engine.py in create_training_dataset(self, feature_view_obj, training_dataset_obj, user_write_options)
    232     ):
    233         self._set_event_time(feature_view_obj, training_dataset_obj)
--> 234         updated_instance = self._create_training_data_metadata(
    235             feature_view_obj, training_dataset_obj
    236         )

/srv/hops/anaconda/envs/theenv/lib/python3.8/site-packages/hsfs/core/feature_view_engine.py in _create_training_data_metadata(self, feature_view_obj, training_dataset_obj)
    494 
    495     def _create_training_data_metadata(self, feature_view_obj, training_dataset_obj):
--> 496         td = self._feature_view_api.create_training_dataset(
    497             feature_view_obj.name, feature_view_obj.version, training_dataset_obj
    498         )

/srv/hops/anaconda/envs/theenv/lib/python3.8/site-packages/hsfs/core/feature_view_api.py in create_training_dataset(self, name, version, training_dataset_obj)
    175         headers = {"content-type": "application/json"}
    176         return training_dataset_obj.update_from_response_json(
--> 177             self._client._send_request(
    178                 "POST", path, headers=headers, data=training_dataset_obj.json()
    179             )

/srv/hops/anaconda/envs/theenv/lib/python3.8/site-packages/hsfs/decorators.py in if_connected(inst, *args, **kwargs)
     33         if not inst._connected:
     34             raise NoHopsworksConnectionError
---> 35         return fn(inst, *args, **kwargs)
     36 
     37     return if_connected

/srv/hops/anaconda/envs/theenv/lib/python3.8/site-packages/hsfs/client/base.py in _send_request(self, method, path_params, query_params, headers, data, stream, files)
    169 
    170         if response.status_code // 100 != 2:
--> 171             raise exceptions.RestAPIError(url, response)
    172 
    173         if stream:

RestAPIError: Metadata operation error: (url: https://hopsworks.glassfish.service.consul:8182/hopsworks-api/api/project/124/featurestores/70/featureview/a_bank_fraud_poc_fv/version/1/trainingdatasets). Server response: 
HTTP code: 400, HTTP reason: Bad Request, body: b'{"type":"restApiJsonResponse","errorCode":270099,"errorMsg":"Illegal training dataset split percentage","usrMsg":", the provided training dataset split percentage is invalid. Percentages can only be numeric. Weights will be normalized if they don\xe2\x80\x99t sum up to 1.0."}', error code: 270099, error msg: Illegal training dataset split percentage, user msg: , the provided training dataset split percentage is invalid. Percentages can only be numeric. Weights will be normalized if they don’t sum up to 1.0.

Since you have removed the 3.0 projects you published before, there are no studies that we can take as an example in this regard. We would be happy if you could help us with this issue.

Kind regards

Hi @cmlops

Are you using Platform: Hopsworks 3.1?

Thanks,
Kenneth