22from collections import namedtuple
33from io import StringIO
44from textwrap import dedent
5+ from typing import Any , Literal
56
67import numpy
78import numpy .testing as nptest
89import pandas
910import pandas .testing as pdtest
1011import pytest
1112import statsmodels .api as sm
13+ from numpy ._typing ._array_like import NDArray
14+ from pandas import DataFrame
1215from scipy import stats
1316
1417from wqio .tests import helpers
@@ -92,7 +95,7 @@ def test_process_p_vals(fxn, pval, expected, error_to_raise):
9295 (1.01 , (None , None ), ValueError ),
9396 ],
9497)
95- def test_translate_p_vals (pval , expected , as_emoji , error_to_raise ):
98+ def test_translate_p_vals (pval , expected , as_emoji : bool , error_to_raise ):
9699 with helpers .raises (error_to_raise ):
97100 result = numutils .translate_p_vals (pval , as_emoji = as_emoji )
98101 assert result == expected [as_emoji ]
@@ -125,7 +128,7 @@ def test_anderson_darling():
125128
126129
127130@pytest .mark .parametrize ("which" , ["good" , "bad" ])
128- def test_processAndersonDarlingResults (which ):
131+ def test_processAndersonDarlingResults (which : Literal [ "good" ] | Literal [ "bad" ] ):
129132 fieldnames = ["statistic" , "critical_values" , "significance_level" ]
130133 AndersonResult = namedtuple ("AndersonResult" , fieldnames )
131134 ARs = {
@@ -215,7 +218,7 @@ def units_norm_data():
215218 return raw , expected
216219
217220
218- def test_normalize_units (units_norm_data ):
221+ def test_normalize_units (units_norm_data : tuple [ DataFrame , DataFrame ] ):
219222 unitsmap = {"ug/L" : 1e-6 , "mg/L" : 1e-3 , "g/L" : 1e0 }
220223
221224 targetunits = {"Lead, Total" : "ug/L" , "Cadmium, Total" : "mg/L" }
@@ -226,7 +229,7 @@ def test_normalize_units(units_norm_data):
226229 pdtest .assert_frame_equal (result , expected )
227230
228231
229- def test_normalize_units_bad_targetunits (units_norm_data ):
232+ def test_normalize_units_bad_targetunits (units_norm_data : tuple [ DataFrame , DataFrame ] ):
230233 unitsmap = {"ug/L" : 1e-6 , "mg/L" : 1e-3 , "g/L" : 1e0 }
231234
232235 targetunits = {"Lead, Total" : "ug/L" }
@@ -243,7 +246,7 @@ def test_normalize_units_bad_targetunits(units_norm_data):
243246 )
244247
245248
246- def test_normalize_units_bad_normalization (units_norm_data ):
249+ def test_normalize_units_bad_normalization (units_norm_data : tuple [ DataFrame , DataFrame ] ):
247250 unitsmap = {"mg/L" : 1e-3 , "g/L" : 1e0 }
248251
249252 targetunits = {"Lead, Total" : "ug/L" , "Cadmium, Total" : "mg/L" }
@@ -260,7 +263,7 @@ def test_normalize_units_bad_normalization(units_norm_data):
260263 )
261264
262265
263- def test_normalize_units_bad_conversion (units_norm_data ):
266+ def test_normalize_units_bad_conversion (units_norm_data : tuple [ DataFrame , DataFrame ] ):
264267 unitsmap = {"ug/L" : 1e-6 , "mg/L" : 1e-3 , "g/L" : 1e0 }
265268
266269 targetunits = {"Lead, Total" : "ng/L" , "Cadmium, Total" : "mg/L" }
@@ -292,7 +295,7 @@ def test_test_pH2concentration(pH, expected, error):
292295
293296@helpers .seed
294297@pytest .mark .parametrize ("error" , [None , ValueError ])
295- def test_compute_theilslope_default (error ):
298+ def test_compute_theilslope_default (error : types . NoneType | type [ ValueError ] ):
296299 with helpers .raises (error ):
297300 y = helpers .getTestROSData ()["res" ].values
298301 x = numpy .arange (len (y ) - 1 ) if error else None
@@ -443,7 +446,7 @@ def fit_data():
443446 (None , "junk" , ValueError ),
444447 ],
445448)
446- def test_fit_line (fit_data , fitlogs , fitprobs , error ):
449+ def test_fit_line (fit_data : dict [ str , NDArray [ Any ]] , fitlogs , fitprobs , error ):
447450 xy = {
448451 (None , None ): (fit_data ["zscores" ], fit_data ["data" ]),
449452 ("y" , None ): (fit_data ["zscores" ], fit_data ["data" ]),
@@ -483,13 +486,13 @@ def test_fit_line(fit_data, fitlogs, fitprobs, error):
483486 assert isinstance (res , sm .regression .linear_model .RegressionResultsWrapper )
484487
485488
486- def test_fit_line_through_origin (fit_data ):
489+ def test_fit_line_through_origin (fit_data : dict [ str , NDArray [ Any ]] ):
487490 x , y = fit_data ["zscores" ], fit_data ["data" ]
488491 x_ , y_ , res = numutils .fit_line (x , y , through_origin = True )
489492 assert res .params [0 ] == 0
490493
491494
492- def test_fit_line_with_xhat (fit_data ):
495+ def test_fit_line_with_xhat (fit_data : dict [ str , NDArray [ Any ]] ):
493496 x , y = fit_data ["zscores" ], fit_data ["data" ]
494497 x_ , y_ , res = numutils .fit_line (x , y , xhat = [- 2 , - 1 , 0 , 1 , 2 ])
495498 expected = [- 0.566018 , 4.774419 , 10.114857 , 15.455295 , 20.795733 ]
@@ -799,3 +802,93 @@ def test_remove_outliers():
799802 x = numpy .random .normal (0 , 4 , size = 37 )
800803
801804 assert numutils .remove_outliers (x ).shape == expected_shape
805+
806+
807+ def test_tukey_hsd_functions ():
808+ expected_records = [
809+ {
810+ "chemical_name" : "Copper" ,
811+ "Loc_0" : - 2.0 ,
812+ "Loc_1" : 6.0 ,
813+ "Loc_2" : - 2.0 ,
814+ "Loc_3" : - 4.0 ,
815+ "Loc_4" : 3.0 ,
816+ "Loc_5" : - 1.0 ,
817+ "Loc_6" : 0.0 ,
818+ },
819+ {
820+ "chemical_name" : "Di(2-ethylhexyl)phthalate" ,
821+ "Loc_0" : 3.0 ,
822+ "Loc_1" : 5.0 ,
823+ "Loc_2" : - 2.0 ,
824+ "Loc_3" : - 2.0 ,
825+ "Loc_4" : - 2.0 ,
826+ "Loc_5" : - 1.0 ,
827+ "Loc_6" : - 1.0 ,
828+ },
829+ {
830+ "chemical_name" : "Indeno(1,2,3-cd)pyrene" ,
831+ "Loc_0" : 2.0 ,
832+ "Loc_1" : 0.0 ,
833+ "Loc_2" : 6.0 ,
834+ "Loc_3" : - 2.0 ,
835+ "Loc_4" : - 2.0 ,
836+ "Loc_5" : - 4.0 ,
837+ "Loc_6" : 0.0 ,
838+ },
839+ {
840+ "chemical_name" : "Lead" ,
841+ "Loc_0" : 0.0 ,
842+ "Loc_1" : 6.0 ,
843+ "Loc_2" : - 2.0 ,
844+ "Loc_3" : - 3.0 ,
845+ "Loc_4" : 4.0 ,
846+ "Loc_5" : - 3.0 ,
847+ "Loc_6" : - 2.0 ,
848+ },
849+ {
850+ "chemical_name" : "Phenanthrene" ,
851+ "Loc_0" : 1.0 ,
852+ "Loc_1" : 0.0 ,
853+ "Loc_2" : 1.0 ,
854+ "Loc_3" : - 3.0 ,
855+ "Loc_4" : 0.0 ,
856+ "Loc_5" : 0.0 ,
857+ "Loc_6" : 1.0 ,
858+ },
859+ {
860+ "chemical_name" : "Pyrene" ,
861+ "Loc_0" : 0.0 ,
862+ "Loc_1" : - 1.0 ,
863+ "Loc_2" : 3.0 ,
864+ "Loc_3" : - 2.0 ,
865+ "Loc_4" : - 2.0 ,
866+ "Loc_5" : - 2.0 ,
867+ "Loc_6" : 4.0 ,
868+ },
869+ {
870+ "chemical_name" : "Total Suspended Solids" ,
871+ "Loc_0" : - 1.0 ,
872+ "Loc_1" : - 1.0 ,
873+ "Loc_2" : - 1.0 ,
874+ "Loc_3" : - 1.0 ,
875+ "Loc_4" : - 1.0 ,
876+ "Loc_5" : - 1.0 ,
877+ "Loc_6" : 6.0 ,
878+ },
879+ {
880+ "chemical_name" : "Zinc" ,
881+ "Loc_0" : 0.0 ,
882+ "Loc_1" : 1.0 ,
883+ "Loc_2" : - 1.0 ,
884+ "Loc_3" : - 6.0 ,
885+ "Loc_4" : - 1.0 ,
886+ "Loc_5" : 4.0 ,
887+ "Loc_6" : 3.0 ,
888+ },
889+ ]
890+ expected = pandas .DataFrame (expected_records ).set_index ("chemical_name" )
891+ wq = pandas .read_pickle (helpers .test_data_path ("wq.pkl" ))
892+ hsd = numutils .tukey_hsd (wq , "res" , "location" , "chemical_name" )
893+ result = numutils .process_tukey_hsd_scores (hsd , "location" , "chemical_name" )
894+ pandas .testing .assert_frame_equal (result , expected , check_names = False )
0 commit comments