Thanks to visit codestin.com
Credit goes to github.com

Skip to content

adapt ODV files "load" function for WOD data[👆] #136

@qcazenave

Description

@qcazenave

Dear DIVAnd developers,

I am looking for duplicates between EMODNet Chemistry and the WOD.
To do so, I use the DIVAnd.Quadtrees.checkduplicates function as a first guess but then refine the duplicate detection focusing on consistent profiles instead of looking at each observation as if it were independent from the others.
For this, I decided to use the information given by the "id" output variable of the function "load" (from NCODV and/or from PhysOcean.WorldOceanDatabase) but there is a problem with the measurements from the WOD : they do not have information for the "LOCAL_CDI_ID" metadata. The "id" output variable is thus an empty variable.
Would it be possible to add an option in the "load" function so that if the "LOCAL_CDI_ID" is not available, then another parameter can be used (like the "Station" parameter for example, in the case of the WOD) ?
I created locally a development package for DIVAnd to adapt the NCODV.load function (see code below). I was wondering whether it would be possible to modify the "official" packages accordingly (or maybe in a smarter way) ?

Thanks
Quitterie

Following is the way I adapted the function:

function load_WOD(T, fname, long_name; qv_flags = ["good_value", "probably_good_value"],
nchunk = 10)

accepted_status_flags = qv_flags

Dataset(fname) do ds
    nstations = Int(ds.dim["N_STATIONS"])
    LOCAL_CDI_ID = fill("",nstations)

    if length(varbyattrib(ds, long_name = "LOCAL_CDI_ID")) == 0
        @warn "No variable with the long_name attribute \'LOCAL_CDI_ID\' in $fname found. We use the station name instead."
		
		ncvar_LOCAL_CDI_ID = varbyattrib_first(ds, long_name = "Station")

        if ndims(ncvar_LOCAL_CDI_ID) == 2
            LOCAL_CDI_ID = chararray2strings(ncvar_LOCAL_CDI_ID.var[:])
        else
            @warn """The variable with the long_name attribute \'Station\' is expected to have two dimensions. For example the output of 'ncdump -h' of $fname should contain:

[...]
char metavar4(N_STATIONS, STRING20) ;
metavar4:long_name = "Station" ;
[...]

We use the empty string for LOCAL_CDI_ID instead.
"""
end

    else
        ncvar_LOCAL_CDI_ID = varbyattrib_first(ds, long_name = "LOCAL_CDI_ID")

        if ndims(ncvar_LOCAL_CDI_ID) == 2
            LOCAL_CDI_ID = chararray2strings(ncvar_LOCAL_CDI_ID.var[:])
        else
            @warn """The variable with the long_name attribute \'LOCAL_CDI_ID\' is expected to have two dimensions. For example the output of 'ncdump -h' of $fname should contain:

[...]
char metavar4(N_STATIONS, STRING36) ;
metavar4:long_name = "LOCAL_CDI_ID" ;
[...]

We use the station name instead.
"""
ncvar_LOCAL_CDI_ID = varbyattrib_first(ds, long_name = "Station")
if ndims(ncvar_LOCAL_CDI_ID) == 2
LOCAL_CDI_ID = chararray2strings(ncvar_LOCAL_CDI_ID.var[:])
else
@warn """The variable with the long_name attribute 'Station' is expected to have two dimensions. For example the output of 'ncdump -h' of $fname should contain:
[...]
char metavar4(N_STATIONS, STRING20) ;
metavar4:long_name = "Station" ;
[...]

We use the empty string for LOCAL_CDI_ID instead.
	"""
			end
        end
    end

    EDMO_CODE = if length(varbyattrib(ds; long_name = "EDMO_code")) > 0
        varbyattrib_first(ds, long_name = "EDMO_code")[:]
    else
        varbyattrib_first(ds, long_name = "EDMO_CODE")[:]
    end

    obsproflon = varbyattrib_first(ds, standard_name = "longitude")[:]
    obsproflat = varbyattrib_first(ds, standard_name = "latitude")[:]

    # time for time series
    ncvar_time = nothing
    ncv_ancillary_time = nothing
    fillval_time = nothing
    accepted_status_flag_values_time = nothing
    vars_time_ISO8601 = varbyattrib(ds, long_name = "time_ISO8601")

    if length(vars_time_ISO8601) == 1
        # time series
        ncvar_time = vars_time_ISO8601[1]
        @assert ndims(ncvar_time) == 2
    else
        # profile
        obsproftime = varbyattrib_first(ds, standard_name = "time")[:]
        @assert ndims(obsproftime) == 1
    end

    ncvar = varbyattrib_first(ds, long_name = long_name)
    ncvar_z = varbyattrib_first(ds, long_name = "Depth")

    @debug "variable: $(name(ncvar))"
    @debug "variable z: $(name(ncvar_z))"

    ncv_ancillary, accepted_status_flag_values = statusflags(ncvar,accepted_status_flags)
    ncv_ancillary_z, accepted_status_flag_values_z = statusflags(ncvar_z,accepted_status_flags)

    if ncvar_time !== nothing
        ncv_ancillary_time, accepted_status_flag_values_time = statusflags(ncvar_time,accepted_status_flags)
        fillval_time = get(ncvar_time.attrib, "_FillValue", nothing)
    end

    fillval = ncvar.attrib["_FillValue"]
    fillval_z = get(ncvar_z.attrib, "_FillValue", nothing)

    data, data_z, data_time = loadprof(
        ncvar.var,
        ncv_ancillary,
        fillval,
        accepted_status_flag_values,

        ncvar_z.var,
        ncv_ancillary_z,
        fillval_z,
        accepted_status_flag_values_z,

        #= these 4 variables are nothing for profiles =#
        (ncvar_time == nothing ? nothing : ncvar_time.var),
        ncv_ancillary_time,
        fillval_time,
        accepted_status_flag_values_time,
        nchunk = nchunk
    )


    if ncvar_time !== nothing
        time_units = ncvar_time.attrib["units"]
        @debug "time_units: $time_units (decode as fractional years)"
        @assert time_units == "years since 0000-01-01"
        obsproftime = decode_odv_years.(data_time,fillval_time)
    end

    obsvalue, obslon, obslat, obsdepth, obstime, obsids = flatten_data(
        T,
        obsproflon,
        obsproflat,
        obsproftime,
        EDMO_CODE,
        LOCAL_CDI_ID,
        data,
        data_z,
    )
    return obsvalue, obslon, obslat, obsdepth, obstime, obsids
end

end

Metadata

Metadata

Assignees

Type

No type

Projects

Status

Done

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions