Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit fa88c24

Browse files
committed
feat(validation): add day of the month add assertions and error handling
1 parent cfbd38b commit fa88c24

File tree

5 files changed

+49
-16
lines changed

5 files changed

+49
-16
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,6 @@ cython_debug/
161161
.DS_Store
162162
kdp/data/fake_data.csv
163163
*.json
164+
165+
# Ignore all contents of my_tests folder
166+
my_tests/*

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ features_specs = {
6161
# INSTANTIATE THE PREPROCESSING MODEL with your data
6262
ppr = PreprocessingModel(
6363
path_data="data/my_data.csv",
64-
features_specs=features_spec,
64+
features_specs=features_specs,
6565
)
6666
# construct the preprocessing pipelines
6767
ppr.build_preprocessor()

docs/contributing.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ If you wan't to request a new feature or you have detected an issue, please use
5959

6060
- [x] Create your first Merge Request (MR) as soon as possible.
6161

62-
> Merge requests will be responsible for semantic-release storytelling and so use them wisely! The changelog report generated automatically will be based on your commits merged into main branch and should cover all the thins you did for the project, as an example:
62+
> Merge requests will be responsible for semantic-release storytelling and so use them wisely! The changelog report generated automatically will be based on your commits merged into main branch and should cover all the things you did for the project, as an example:
6363
6464
- [x] Separate your merge requests based on LABEL or functionality if you are working on `feat` label
6565

@@ -75,7 +75,7 @@ If you wan't to request a new feature or you have detected an issue, please use
7575
7676
`{LABEL}(KDP): {message}`
7777

78-
- [x] Use small Merge Requests but do them more ofthen < 400 ligns for quicker and simple review and not the whole project !
78+
- [x] Use small Merge Requests but do them more ofthen < 400 lines for quicker and simple review and not the whole project !
7979

8080
- [x] Ask for a Code Review !
8181

docs/features.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ Example cross feature between INTEGER_CATEGORICAL and STRING_CATEGORICAL:
205205

206206
## 📆 Date Features
207207

208-
You can even process string encoded date features (format: 'YYYY-MM-DD'):
208+
You can even process string encoded date features (format: 'YYYY-MM-DD' or 'YYYY/MM/DD'):
209209

210210
=== "🔧 Using FeatureType"
211211

kdp/custom_layers.py

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ def __init__(self, date_format: str = "YYYY-MM-DD", **kwargs) -> None:
9090
9191
Args:
9292
date_format (str): format of the string encoded date to parse.
93+
Supported formats: YYYY-MM-DD, YYYY/MM/DD
9394
kwargs (dict): other params to pass to the class.
9495
"""
9596
super().__init__(**kwargs)
@@ -103,31 +104,55 @@ def call(self, inputs: tf.Tensor) -> tf.Tensor:
103104
inputs (tf.Tensor): Tensor with input data.
104105
105106
Returns:
106-
tf.Tensor: processed date tensor with all cyclic components.
107+
tf.Tensor: processed date tensor with all components [year, month, day_of_month, day_of_week].
107108
"""
108109

109110
def parse_date(date_str: str) -> tf.Tensor:
111+
# Handle missing/invalid dates
112+
is_valid = tf.strings.regex_full_match(date_str, r"^\d{1,4}[-/]\d{1,2}[-/]\d{1,2}$")
113+
tf.debugging.assert_equal(
114+
is_valid,
115+
True,
116+
message="Invalid date format. Expected YYYY-MM-DD or YYYY/MM/DD",
117+
)
118+
119+
# First, standardize the separator to '-' in case of YYYY/MM/DD format
120+
date_str = tf.strings.regex_replace(date_str, "/", "-")
121+
110122
parts = tf.strings.split(date_str, "-")
111123
year = tf.strings.to_number(parts[0], out_type=tf.int32)
112124
month = tf.strings.to_number(parts[1], out_type=tf.int32)
113-
day = tf.strings.to_number(parts[2], out_type=tf.int32)
125+
day_of_month = tf.strings.to_number(parts[2], out_type=tf.int32)
126+
127+
# Validate date components
128+
# Validate year is in reasonable range
129+
tf.debugging.assert_greater_equal(year, 1000, message="Year must be >= 1000")
130+
tf.debugging.assert_less_equal(year, 2200, message="Year must be <= 2200")
131+
132+
# Validate month is between 1-12
133+
tf.debugging.assert_greater_equal(month, 1, message="Month must be >= 1")
134+
tf.debugging.assert_less_equal(month, 12, message="Month must be <= 12")
135+
136+
# Validate day is between 1-31
137+
tf.debugging.assert_greater_equal(day_of_month, 1, message="Day must be >= 1")
138+
tf.debugging.assert_less_equal(day_of_month, 31, message="Day must be <= 31")
114139

115140
# Calculate day of week using Zeller's congruence
116141
y = tf.where(month < 3, year - 1, year)
117142
m = tf.where(month < 3, month + 12, month)
118143
k = y % 100
119144
j = y // 100
120-
h = (day + ((13 * (m + 1)) // 5) + k + (k // 4) + (j // 4) - (2 * j)) % 7
145+
h = (day_of_month + ((13 * (m + 1)) // 5) + k + (k // 4) + (j // 4) - (2 * j)) % 7
121146
day_of_week = tf.where(h == 0, 6, h - 1) # Adjust to 0-6 range where 0 is Monday
122147

123-
return tf.stack([year, month, day_of_week])
148+
return tf.stack([year, month, day_of_month, day_of_week])
124149

125150
parsed_dates = tf.map_fn(parse_date, tf.squeeze(inputs), fn_output_signature=tf.int32)
126151
return parsed_dates
127152

128153
def compute_output_shape(self, input_shape: int) -> int:
129154
"""Getting output shape."""
130-
return tf.TensorShape([input_shape[0], 3])
155+
return tf.TensorShape([input_shape[0], 4]) # Changed to 4 components
131156

132157
def get_config(self) -> dict:
133158
"""Saving configuration."""
@@ -171,14 +196,14 @@ def cyclic_encoding(self, value: tf.Tensor, period: float) -> tuple[tf.Tensor, t
171196

172197
@tf.function
173198
def call(self, inputs: tf.Tensor) -> tf.Tensor:
174-
"""Splits the date into 3 components: year, month and day and
199+
"""Splits the date into 4 components: year, month, day and day of the week and
175200
encodes it into sin and cos cyclical projections.
176201
177202
Args:
178-
inputs (tf.Tensor): input data.
203+
inputs (tf.Tensor): input data [year, month, day_of_month, day_of_week].
179204
180205
Returns:
181-
(tf.Tensor): cyclically encoded data (sin and cos).
206+
tf.Tensor: cyclically encoded data (sin and cos components).
182207
"""
183208
# Reshape input if necessary
184209
input_shape = tf.shape(inputs)
@@ -188,19 +213,22 @@ def call(self, inputs: tf.Tensor) -> tf.Tensor:
188213
# Extract features
189214
year = inputs[:, 0]
190215
month = inputs[:, 1]
191-
day_of_week = inputs[:, 2]
216+
day_of_month = inputs[:, 2] # New: day of month
217+
day_of_week = inputs[:, 3] # Now at index 3
192218

193-
# Cyclical encoding
219+
# Convert to float
194220
year_float = tf.cast(year, tf.float32)
195221
month_float = tf.cast(month, tf.float32)
222+
day_of_month_float = tf.cast(day_of_month, tf.float32)
196223
day_of_week_float = tf.cast(day_of_week, tf.float32)
197224

198225
# Ensure inputs are in the correct range
199226
year_float = self.normalize_year(year_float)
200227

201-
# Encode each feature
228+
# Encode each feature in cyclinc projections
202229
year_sin, year_cos = self.cyclic_encoding(year_float, period=1.0)
203230
month_sin, month_cos = self.cyclic_encoding(month_float, period=12.0)
231+
day_of_month_sin, day_of_month_cos = self.cyclic_encoding(day_of_month_float, period=31.0)
204232
day_of_week_sin, day_of_week_cos = self.cyclic_encoding(day_of_week_float, period=7.0)
205233

206234
encoded = tf.stack(
@@ -209,6 +237,8 @@ def call(self, inputs: tf.Tensor) -> tf.Tensor:
209237
year_cos,
210238
month_sin,
211239
month_cos,
240+
day_of_month_sin, # New
241+
day_of_month_cos, # New
212242
day_of_week_sin,
213243
day_of_week_cos,
214244
],
@@ -219,7 +249,7 @@ def call(self, inputs: tf.Tensor) -> tf.Tensor:
219249

220250
def compute_output_shape(self, input_shape: int) -> int:
221251
"""Getting output shape."""
222-
return tf.TensorShape([input_shape[0], 6])
252+
return tf.TensorShape([input_shape[0], 8]) # Changed to 8 for 4 features * 2 components each
223253

224254
def get_config(self) -> dict:
225255
"""Returns the configuration of the layer as a dictionary."""

0 commit comments

Comments
 (0)