From 90b5c10a088fb4d0e24ef9b33144fd877ec5b38d Mon Sep 17 00:00:00 2001 From: Carmen Tawalika Date: Fri, 1 Apr 2022 11:34:10 +0300 Subject: [PATCH 01/46] Refactor rest - part 2 (#322) * reactivate redis queue * rename variable * make queue type configurable * enhance dev setup for redis queue * lint * move user_auth * move user auth * move base_login * move map_layer_base * move renderer_base * move resource_base * fix relative imports * lint * first splitup: raster_colors * splitup download_cache_management * splitup ephemeral_custom_processing * splitup ephemeral_processing_with_export * splitup ephemeral_processing * splitup renderer_base * splitup persistent_processing * splitup location_management * splitup map_layer_management * splitup mapset_management * splitup persistent_mapset_merger * splitup process_validation * splitup raster_export * splitup raster_layer * splitup raster_legend * lint * splitup raster_renderer * splitup resource_storage_management * splitup strds_management * splitup strds_raster_management * splitup strds_renderer * splitup vector_layer * splitup vector_renderer * lint * make inheritance more clear * add readme * fix import --- .../ephemeral/persistent_processing.py | 644 ++++++++++++++++++ 1 file changed, 644 insertions(+) create mode 100644 src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py diff --git a/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py new file mode 100644 index 0000000..1f2a36b --- /dev/null +++ b/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py @@ -0,0 +1,644 @@ +# -*- coding: utf-8 -*- +####### +# actinia-core - an open source REST API for scalable, distributed, high +# performance processing of geographical data that uses GRASS GIS for +# computational tasks. For details, see https://actinia.mundialis.de/ +# +# Copyright (c) 2016-2022 Sören Gebbert and mundialis GmbH & Co. KG +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +####### + +""" +Asynchronous computation in specific temporary generated and then copied +or original mapsets +""" +import fileinput +import os +import shutil +import sqlite3 +import subprocess + +from actinia_core.processing.actinia_processing.ephemeral_processing \ + import EphemeralProcessing +from actinia_core.core.common.exceptions import AsyncProcessError + +__license__ = "GPLv3" +__author__ = "Sören Gebbert, Guido Riembauer, Anika Weinmann" +__copyright__ = "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" +__maintainer__ = "mundialis" + + +class PersistentProcessing(EphemeralProcessing): + """Processing of grass modules in a temporary or original mapset. + + This class is designed to run GRASS modules that are specified in a process chain + in a temporary mapset that later on is copied into the original location or merged + into an existing mapset. + + Locking concept: + + - Check if the target mapset exists + - Lock the target mapset + - Create a temporary mapset lock (name is generated in constructor) + - Create the temporary mapset in the local storage + - Process + + If target mapset exists: + + - Move the temporary mapset after processing to the user group database + - Merge the temporary mapset into the target mapset + - Delete the temporary mapset + - Unlock the two mapsets after processing is finished, terminated or + raised an error + + If target mapset does not exists: + + - After processing finished successfully, copy the + temporary mapset to the original user group specific location using + the target mapset name + - Unlock the two mapsets after processing is finished, terminated or + raised an error + + """ + + def __init__(self, rdc): + """Constructor + + Args: + rdc (ResourceDataContainer): The data container that contains all + required variables for processing + + """ + + EphemeralProcessing.__init__(self, rdc) + self.target_mapset_name = self.mapset_name + self.target_mapset_exists = False # By default the target mapset + # does not exists + # Set True if this process was successful in setting the lock + self.target_mapset_lock_set = False + self.orig_mapset_path = None + + # We have two mapset lock ids. The target mapset and the temporary mapset + self.target_mapset_lock_id = self._generate_mapset_lock_id( + self.user_group, self.location_name, self.target_mapset_name) + + self.temp_mapset_lock_id = self._generate_mapset_lock_id( + self.user_group, self.location_name, self.temp_mapset_name) + self.temp_mapset_lock_set = False + + def _generate_mapset_lock_id(self, user_group, location_name, mapset_name): + """Generate a unique id to lock a mapset in the redis database + + Locations are user group specific. Hence different user groups may have + locations with the same names and with equal mapset names. + + In the same user group, a location/mapset must be locked to grant exclusive + access rights. + + Args: + user_group: The user group used for locking + location_name: The location name in which the mapset is located + for locking + mapset_name: The mapset name that should be locked + + Returns: + The lock id + + """ + return "%s/%s/%s" % (user_group, location_name, mapset_name) + + def _lock_temp_mapset(self): + """Lock the temporary mapset + + This method sets in case of success: self.tmp_mapset_lock_set = True + """ + # Lock the temporary mapset for the time that the user can allocate at maximum + ret = self.lock_interface.lock( + resource_id=self.temp_mapset_lock_id, + expiration=self.process_time_limit * self.process_num_limit) + + if ret == 0: + raise AsyncProcessError( + "Unable to lock temporary mapset <%s>, " + "resource is already locked" % self.target_mapset_name) + self.message_logger.info("Mapset <%s> locked" % self.target_mapset_name) + + # if we manage to come here, the lock was correctly set + self.temp_mapset_lock_set = True + + def _check_mapset(self, mapset): + """Check if the target mapset exists + + This method will check if the target mapset exists in the global and + user group locations. + If the mapset is in the global database, then an AsyncProcessError + will be raised, since global location/mapsets can not be modified. + + This method sets in case of success: + + self.target_mapset_lock_set = True + self.tmp_mapset_lock_set = True + + Raises: + AsyncProcessError + + """ + mapset_exists = False + + # Check if the global location is accessible and that the target mapset + # does not exist + if self.is_global_database is True: + # Break if the target mapset exists in the global database + if os.path.exists(self.global_location_path) and \ + os.path.isdir(self.global_location_path) and \ + os.access( + self.global_location_path, + os.R_OK | os.X_OK | os.W_OK) is True: + self.orig_mapset_path = os.path.join(self.global_location_path, mapset) + + if os.path.exists(self.orig_mapset_path) is True: + if os.access( + self.orig_mapset_path, os.R_OK | os.X_OK | os.W_OK) is True: + raise AsyncProcessError( + "Mapset <%s> exists in the global " + "dataset and can not be modified." % mapset) + else: + raise AsyncProcessError( + "Unable to access global location <%s>" % self.location_name) + + # Always check if the target mapset already exists and set the flag accordingly + if (os.path.exists(self.user_location_path) + and os.path.isdir(self.user_location_path) + and os.access( + self.user_location_path, os.R_OK | os.X_OK | os.W_OK) is True): + + self.orig_mapset_path = os.path.join(self.user_location_path, mapset) + + if os.path.exists(self.orig_mapset_path) is True: + if os.access( + self.orig_mapset_path, os.R_OK | os.X_OK | os.W_OK) is True: + mapset_exists = True + # Add the existing mapset to the required ones for mapset + # search path settings + self.required_mapsets.append(mapset) + else: + raise AsyncProcessError("Unable to access mapset <%s> " + "path %s" % (mapset, + self.orig_mapset_path)) + else: + mapset_exists = False + else: + raise AsyncProcessError( + "Unable to access user location <%s>" % self.location_name) + + return mapset_exists + + def _check_target_mapset_exists(self): + """Check if the target mapset exists + + This method will check if the target mapset exists in the global and user + location. + If the mapset is in the global database, then an AsyncProcessError will + be raised, since global mapsets can not be modified. + + This method sets in case of success: + + self.target_mapset_exists = True/False + + Raises: + AsyncProcessError + """ + self.target_mapset_exists = self._check_mapset(self.target_mapset_name) + + def _check_lock_target_mapset(self): + """Check if the target mapset exists and lock it, then lock the temporary mapset + + This method will check if the target mapset exists in the global and user + location. + If the mapset is in the global database, then an AsyncProcessError will + be raised, since global mapsets can not be modified. + + This method sets in case of success: + + self.target_mapset_lock_set = True + self.tmp_mapset_lock_set = True + + Raises: + AsyncProcessError + + """ + self._check_target_mapset_exists() + self._lock_target_mapset() + + def _lock_target_mapset(self): + """Lock the target mapset + + Raises: + AsyncProcessError + + """ + + # Lock the mapset for the time that the user can allocate at maximum + ret = self.lock_interface.lock( + resource_id=self.target_mapset_lock_id, + expiration=self.process_time_limit * self.process_num_limit) + + if ret == 0: + raise AsyncProcessError( + "Unable to lock location/mapset <%s/%s>, " + "resource is already locked" % (self.location_name, + self.target_mapset_name)) + self.message_logger.info( + "location/mapset <%s/%s> locked" % (self.location_name, + self.target_mapset_name)) + + # if we manage to come here, the lock was correctly set + self.target_mapset_lock_set = True + + def _change_mapsetname_in_group(self, group_path, source_mapset, target_mapset): + """Replaces the mapset name in the group file + + Args: + group_path(str): path of the group folder in the source mapset + source_mapset(str): name of source mapset + target_mapset(str): name of target mapset + + Raises: + This method will raise an AsyncProcessError if a group has no REF file + """ + group_dirs = os.listdir(group_path) + for group_dir in group_dirs: + group_file = os.path.join(group_path, group_dir, "REF") + if os.path.isfile(group_file): + for line in fileinput.input(group_file, inplace=True): + print(line.replace( + source_mapset, target_mapset), end='') + else: + raise AsyncProcessError("group %s has no REF file" + % (group_dir)) + + def _update_views_in_tgis(self, tgis_db_path): + """Update views in tgis sqlite.db + + Args: + tgis_db_path(str): Path to the tgis sqlite.db file where the views + should be updated + """ + con = sqlite3.connect(tgis_db_path) + cur = con.cursor() + + sql_script_folder = os.path.join(os.getenv("GISBASE"), "etc", "sql") + drop_view_sql = os.path.join(sql_script_folder, 'drop_views.sql') + with open(drop_view_sql, 'r') as sql: + sql_drop_str = sql.read() + cur.executescript(sql_drop_str) + + view_sql_file_names = [ + "raster_views.sql", + "raster3d_views.sql", + "vector_views.sql", + "strds_views.sql", + "str3ds_views.sql", + "stvds_views.sql" + ] + for view_sql_file_name in view_sql_file_names: + view_sql_file = os.path.join(sql_script_folder, view_sql_file_name) + with open(view_sql_file, 'r') as sql: + sql_view_str = sql.read() + cur.executescript(sql_view_str) + con.commit() + if con: + con.close() + del cur + + def _merge_tgis_dbs(self, tgis_db_path_1, tgis_db_path_2): + """Merge two tgis sqlite.db files + + Args: + tgis_db_path_1(str): path of a tgis sqlite.db file in which the + other should be merged + tgis_db_path_2(str): path of a tgis sqlite.db file which should be + merged in tgis_db_path_1 + """ + con = sqlite3.connect(tgis_db_path_1) + con.execute(f"ATTACH '{tgis_db_path_2}' as dba") + con.execute("BEGIN") + + table_names1 = [row[1] for row in con.execute( + "SELECT * FROM sqlite_master where type='table'")] + table_names2 = [row[1] for row in con.execute( + "SELECT * FROM dba.sqlite_master where type='table'")] + + # merge databases + for table in table_names2: + if table == 'tgis_metadata': + con.execute(f"DROP TABLE {table}") + con.execute(f"CREATE TABLE {table} AS " + f"SELECT * FROM dba.{table}") + continue + # for example raster_register_xxx tables are not in both dbs + if table not in table_names1: + con.execute(f"CREATE TABLE {table} AS " + f"SELECT * FROM dba.{table}") + continue + combine = f"INSERT OR IGNORE INTO {table} SELECT * FROM dba.{table}" + con.execute(combine) + con.commit() + con.execute("detach database dba") + if con: + con.close() + + def _change_mapsetname_in_tgis(self, tgis_path, source_mapset, + target_mapset, target_tgis_db): + """Replaces the mapset name in the tgis sqlite.db + + Args: + tgis_path(str): path of the tgis folder in the source mapset + source_mapset(str): name of source mapset + target_mapset(str): name of target mapset + target_tgis_db(str): path to existing tgis sqlite.db of target + mapset. None if nonexistent. + """ + + tgis_db_path = os.path.join(tgis_path, 'sqlite.db') + + # tables + con = sqlite3.connect(tgis_db_path) + cur = con.cursor() + table_names = [row[1] for row in cur.execute( + "SELECT * FROM sqlite_master where type='table'")] + for table_name in table_names: + columns = [row[0] for row in cur.execute( + f"SELECT * FROM {table_name}").description] + for col in columns: + cur.execute(f"UPDATE {table_name} SET {col} = REPLACE({col}, " + f"'{source_mapset}', '{target_mapset}')") + con.commit() + if con: + con.close() + del cur + + # if there already exists a sqlite.db file then merge it + if target_tgis_db is not None: + self._merge_tgis_dbs(tgis_db_path, target_tgis_db) + + # update views + self._update_views_in_tgis(tgis_db_path) + + def _merge_mapset_into_target(self, source_mapset, target_mapset): + """Link the source mapset content into the target mapset + + Attention: Not all directories and files in the mapset are copied. + See list directories. + """ + self.message_logger.info( + "Copy source mapset <%s> content " + "into the target mapset <%s>" % (source_mapset, target_mapset)) + + # Raster, vector, group and space time data set directories/files + directories = ["cell", "misc", "fcell", + "cats", "cellhd", + "cell_misc", "colr", "colr2", + "hist", "vector", "group", "tgis", "VAR"] + + for directory in directories: + source_path = os.path.join( + self.user_location_path, source_mapset, directory) + target_path = os.path.join(self.user_location_path, target_mapset) + + if os.path.exists(source_path) is True: + if directory == "group": + self._change_mapsetname_in_group( + source_path, source_mapset, target_mapset) + if directory == "tgis": + target_tgis_db = None + if os.path.isdir(os.path.join(target_path, 'tgis')): + target_tgis_db = os.path.join(target_path, 'tgis', 'sqlite.db') + self._change_mapsetname_in_tgis( + source_path, source_mapset, target_mapset, + target_tgis_db) + + if os.path.exists(source_path) is True: + # Hardlink the sources into the target + stdout = subprocess.PIPE + stderr = subprocess.PIPE + + p = subprocess.Popen(["/bin/cp", "-flr", + "%s" % source_path, + "%s/." % target_path], + stdout=stdout, + stderr=stderr) + (stdout_buff, stderr_buff) = p.communicate() + if p.returncode != 0: + raise AsyncProcessError( + "Unable to merge mapsets. Error in linking:" + " stdout: %s stderr: %s" % (stdout_buff, stderr_buff)) + + def _copy_merge_tmp_mapset_to_target_mapset(self): + """Copy the temporary mapset into the original location + + In case the mapset does not exists, then use the target mapset name, + otherwise use the temporary mapset name for copying which is later on + merged into the target mapset and then removed + """ + + # Extent the mapset lock for an hour, since copying can take long + if self.target_mapset_lock_set is True: + ret = self.lock_interface.extend(resource_id=self.target_mapset_lock_id, + expiration=3600) + if ret == 0: + raise AsyncProcessError("Unable to extend lock for mapset " + "<%s>" % self.target_mapset_name) + + if self.temp_mapset_lock_set is True: + ret = self.lock_interface.extend(resource_id=self.temp_mapset_lock_id, + expiration=3600) + if ret == 0: + raise AsyncProcessError("Unable to extend lock for " + "temporary mapset <%s>" % self.temp_mapset_name) + + self.message_logger.info( + "Copy temporary mapset from %s to %s" % ( + self.temp_mapset_path, os.path.join( + self.user_location_path, self.target_mapset_name))) + + source_path = self.temp_mapset_path + + # In case the mapset does not exists, then use the target mapset name, + # otherwise use the temporary mapset name for copying which is later + # on merged into the target mapset and then removed + if self.target_mapset_exists is True: + target_path = self.user_location_path + "/." + message = "Copy temporary mapset <%s> to target location " \ + "<%s>" % (self.temp_mapset_name, self.location_name) + else: + target_path = os.path.join(self.user_location_path, self.target_mapset_name) + message = "Copy temporary mapset <%s> to target location " \ + "<%s>" % (self.target_mapset_name, self.location_name) + + self._send_resource_update(message) + + try: + stdout = subprocess.PIPE + stderr = subprocess.PIPE + p = subprocess.Popen(["/bin/cp", "-fr", + "%s" % source_path, + "%s" % target_path], + stdout=stdout, + stderr=stderr) + (stdout_buff, stderr_buff) = p.communicate() + if p.returncode != 0: + raise AsyncProcessError( + "Unable to copy temporary mapset to " + "original location. Copy error " + "stdout: %s stderr: %s returncode: %i" % (stdout_buff, + stderr_buff, + p.returncode)) + except Exception as e: + raise AsyncProcessError("Unable to copy temporary mapset to " + "original location. Exception %s" % str(e)) + + # Merge the temp mapset into the target mapset in case the target already exists + if self.target_mapset_exists is True: + self._merge_mapset_into_target( + self.temp_mapset_name, self.target_mapset_name) + shutil.rmtree(os.path.join(self.user_location_path, self.temp_mapset_name)) + # remove interim results + if self.interim_result.saving_interim_results is True: + interim_dir = os.path.join( + self.interim_result.user_resource_interim_storage_path, + self.resource_id) + self.message_logger.info( + "Remove interim results %s" % interim_dir) + if os.path.isdir(interim_dir): + shutil.rmtree(interim_dir) + + def _execute_process_list(self, process_list): + """Extend the mapset lock and execute the provided process list + + Args: + process_list: The process list to execute + + Raises: + This method will raise an AsyncProcessError or AsyncProcessTermination + """ + for process in process_list: + # Extent the lock for each process by max processing time * 2 + if self.target_mapset_lock_set is True: + ret = self.lock_interface.extend(resource_id=self.target_mapset_lock_id, + expiration=self.process_time_limit * 2) + if ret == 0: + raise AsyncProcessError( + "Unable to extend lock for mapset <%s>" + % self.target_mapset_name) + + if self.temp_mapset_lock_set is True: + # Extent the lock for each process by max processing time * 2 + ret = self.lock_interface.extend(resource_id=self.temp_mapset_lock_id, + expiration=self.process_time_limit * 2) + if ret == 0: + raise AsyncProcessError( + "Unable to extend lock for " + "temporary mapset <%s>" % self.temp_mapset_name) + + if process.exec_type == "grass": + self._run_module(process) + elif process.exec_type == "exec": + self._run_process(process) + elif process.exec_type == "python": + eval(process.executable) + + def _execute(self, skip_permission_check=False): + """Overwrite this function in subclasses + + - Call self._setup() + - Analyse the process chain + - Check the target mapset and lock it for the maximum time + a user can consume -> process_num_limit*process_time_limit + - Initialize and create the temporal database and mapset + or use the original mapset + - Run the modules and extend the lock each run + - Copy the mapset if it has not already exist + - Cleanup and unlock the mapset + + """ + + # Setup the user credentials and logger + self._setup() + # check if this is a job resumption + if self.rdc.iteration is not None: + # Create the process chain + pc_step, old_process_chain_list = \ + self._get_previous_iteration_process_chain() + self.interim_result.set_old_pc_step(pc_step) + process_list = self._validate_process_chain( + process_chain=self.request_data, + old_process_chain=old_process_chain_list, + pc_step=pc_step) + # check iterim results + interim_result_mapset, interim_result_file_path = \ + self.interim_result.check_interim_result_mapset( + pc_step, self.rdc.iteration - 1) + else: + # Create the process chain + process_list = self._validate_process_chain() + interim_result_mapset = None + interim_result_file_path = None + + # Check and lock the target and temp mapsets + self._check_lock_target_mapset() + + if self.target_mapset_exists is False: + # Create the temp database and link the + # required mapsets into it + self._create_temp_database(self.required_mapsets) + + # Initialize the GRASS environment and switch into PERMANENT + # mapset, which is always linked + self._create_grass_environment(grass_data_base=self.temp_grass_data_base, + mapset_name="PERMANENT") + + # Create the temporary mapset with the same name as the target + # mapset and switch into it + self._create_temporary_mapset( + temp_mapset_name=self.target_mapset_name, + interim_result_mapset=interim_result_mapset, + interim_result_file_path=interim_result_file_path) + self.temp_mapset_name = self.target_mapset_name + else: + # Init GRASS environment and create the temporary mapset + self._create_temporary_grass_environment( + source_mapset_name=self.target_mapset_name) + self._lock_temp_mapset() + + # Execute the process list + self._execute_process_list(process_list) + # Copy local mapset to original location, merge mapsets + # if necessary + self._copy_merge_tmp_mapset_to_target_mapset() + # Parse the module sdtout outputs and create the results + self._parse_module_outputs() + + def _final_cleanup(self): + """Final cleanup called in the run function at the very end of processing + """ + # Clean up and remove the temporary gisdbase + self._cleanup() + # Unlock the mapsets + if self.target_mapset_lock_set is True: + self.lock_interface.unlock(self.target_mapset_lock_id) + if self.temp_mapset_lock_set is True: + self.lock_interface.unlock(self.temp_mapset_lock_id) From 2a7c35495010f08aab53dce3fb4752719bcf3b93 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Thu, 22 Sep 2022 15:19:34 +0200 Subject: [PATCH 02/46] Black (#378) * linting tests + black workflow * linting * black * further black * fix unittests * fix test Co-authored-by: anikaweinmann --- .../ephemeral/persistent_processing.py | 437 ++++++++++++------ 1 file changed, 284 insertions(+), 153 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py index 1f2a36b..d367ac3 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py @@ -31,22 +31,25 @@ import sqlite3 import subprocess -from actinia_core.processing.actinia_processing.ephemeral_processing \ - import EphemeralProcessing +from actinia_core.processing.actinia_processing.ephemeral_processing import ( + EphemeralProcessing, +) from actinia_core.core.common.exceptions import AsyncProcessError __license__ = "GPLv3" __author__ = "Sören Gebbert, Guido Riembauer, Anika Weinmann" -__copyright__ = "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" +__copyright__ = ( + "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" +) __maintainer__ = "mundialis" class PersistentProcessing(EphemeralProcessing): """Processing of grass modules in a temporary or original mapset. - This class is designed to run GRASS modules that are specified in a process chain - in a temporary mapset that later on is copied into the original location or merged - into an existing mapset. + This class is designed to run GRASS modules that are specified in a + process chain in a temporary mapset that later on is copied into the + original location or merged into an existing mapset. Locking concept: @@ -91,12 +94,15 @@ def __init__(self, rdc): self.target_mapset_lock_set = False self.orig_mapset_path = None - # We have two mapset lock ids. The target mapset and the temporary mapset + # We have two mapset lock ids. The target mapset and the temporary + # mapset self.target_mapset_lock_id = self._generate_mapset_lock_id( - self.user_group, self.location_name, self.target_mapset_name) + self.user_group, self.location_name, self.target_mapset_name + ) self.temp_mapset_lock_id = self._generate_mapset_lock_id( - self.user_group, self.location_name, self.temp_mapset_name) + self.user_group, self.location_name, self.temp_mapset_name + ) self.temp_mapset_lock_set = False def _generate_mapset_lock_id(self, user_group, location_name, mapset_name): @@ -105,8 +111,8 @@ def _generate_mapset_lock_id(self, user_group, location_name, mapset_name): Locations are user group specific. Hence different user groups may have locations with the same names and with equal mapset names. - In the same user group, a location/mapset must be locked to grant exclusive - access rights. + In the same user group, a location/mapset must be locked to grant + exclusive access rights. Args: user_group: The user group used for locking @@ -125,16 +131,21 @@ def _lock_temp_mapset(self): This method sets in case of success: self.tmp_mapset_lock_set = True """ - # Lock the temporary mapset for the time that the user can allocate at maximum + # Lock the temporary mapset for the time that the user can allocate at + # maximum ret = self.lock_interface.lock( resource_id=self.temp_mapset_lock_id, - expiration=self.process_time_limit * self.process_num_limit) + expiration=self.process_time_limit * self.process_num_limit, + ) if ret == 0: raise AsyncProcessError( "Unable to lock temporary mapset <%s>, " - "resource is already locked" % self.target_mapset_name) - self.message_logger.info("Mapset <%s> locked" % self.target_mapset_name) + "resource is already locked" % self.target_mapset_name + ) + self.message_logger.info( + "Mapset <%s> locked" % self.target_mapset_name + ) # if we manage to come here, the lock was correctly set self.temp_mapset_lock_set = True @@ -162,55 +173,78 @@ def _check_mapset(self, mapset): # does not exist if self.is_global_database is True: # Break if the target mapset exists in the global database - if os.path.exists(self.global_location_path) and \ - os.path.isdir(self.global_location_path) and \ - os.access( - self.global_location_path, - os.R_OK | os.X_OK | os.W_OK) is True: - self.orig_mapset_path = os.path.join(self.global_location_path, mapset) + if ( + os.path.exists(self.global_location_path) + and os.path.isdir(self.global_location_path) + and os.access( + self.global_location_path, os.R_OK | os.X_OK | os.W_OK + ) + is True + ): + self.orig_mapset_path = os.path.join( + self.global_location_path, mapset + ) if os.path.exists(self.orig_mapset_path) is True: - if os.access( - self.orig_mapset_path, os.R_OK | os.X_OK | os.W_OK) is True: + if ( + os.access( + self.orig_mapset_path, os.R_OK | os.X_OK | os.W_OK + ) + is True + ): raise AsyncProcessError( "Mapset <%s> exists in the global " - "dataset and can not be modified." % mapset) + "dataset and can not be modified." % mapset + ) else: raise AsyncProcessError( - "Unable to access global location <%s>" % self.location_name) - - # Always check if the target mapset already exists and set the flag accordingly - if (os.path.exists(self.user_location_path) - and os.path.isdir(self.user_location_path) - and os.access( - self.user_location_path, os.R_OK | os.X_OK | os.W_OK) is True): - - self.orig_mapset_path = os.path.join(self.user_location_path, mapset) + "Unable to access global location <%s>" + % self.location_name + ) + + # Always check if the target mapset already exists and set the flag + # accordingly + if ( + os.path.exists(self.user_location_path) + and os.path.isdir(self.user_location_path) + and os.access(self.user_location_path, os.R_OK | os.X_OK | os.W_OK) + is True + ): + + self.orig_mapset_path = os.path.join( + self.user_location_path, mapset + ) if os.path.exists(self.orig_mapset_path) is True: - if os.access( - self.orig_mapset_path, os.R_OK | os.X_OK | os.W_OK) is True: + if ( + os.access( + self.orig_mapset_path, os.R_OK | os.X_OK | os.W_OK + ) + is True + ): mapset_exists = True # Add the existing mapset to the required ones for mapset # search path settings self.required_mapsets.append(mapset) else: - raise AsyncProcessError("Unable to access mapset <%s> " - "path %s" % (mapset, - self.orig_mapset_path)) + raise AsyncProcessError( + "Unable to access mapset <%s> " + "path %s" % (mapset, self.orig_mapset_path) + ) else: mapset_exists = False else: raise AsyncProcessError( - "Unable to access user location <%s>" % self.location_name) + "Unable to access user location <%s>" % self.location_name + ) return mapset_exists def _check_target_mapset_exists(self): """Check if the target mapset exists - This method will check if the target mapset exists in the global and user - location. + This method will check if the target mapset exists in the global and + user location. If the mapset is in the global database, then an AsyncProcessError will be raised, since global mapsets can not be modified. @@ -224,10 +258,12 @@ def _check_target_mapset_exists(self): self.target_mapset_exists = self._check_mapset(self.target_mapset_name) def _check_lock_target_mapset(self): - """Check if the target mapset exists and lock it, then lock the temporary mapset + """ + Check if the target mapset exists and lock it, then lock the temporary + mapset - This method will check if the target mapset exists in the global and user - location. + This method will check if the target mapset exists in the global and + user location. If the mapset is in the global database, then an AsyncProcessError will be raised, since global mapsets can not be modified. @@ -254,21 +290,26 @@ def _lock_target_mapset(self): # Lock the mapset for the time that the user can allocate at maximum ret = self.lock_interface.lock( resource_id=self.target_mapset_lock_id, - expiration=self.process_time_limit * self.process_num_limit) + expiration=self.process_time_limit * self.process_num_limit, + ) if ret == 0: raise AsyncProcessError( "Unable to lock location/mapset <%s/%s>, " - "resource is already locked" % (self.location_name, - self.target_mapset_name)) + "resource is already locked" + % (self.location_name, self.target_mapset_name) + ) self.message_logger.info( - "location/mapset <%s/%s> locked" % (self.location_name, - self.target_mapset_name)) + "location/mapset <%s/%s> locked" + % (self.location_name, self.target_mapset_name) + ) # if we manage to come here, the lock was correctly set self.target_mapset_lock_set = True - def _change_mapsetname_in_group(self, group_path, source_mapset, target_mapset): + def _change_mapsetname_in_group( + self, group_path, source_mapset, target_mapset + ): """Replaces the mapset name in the group file Args: @@ -277,18 +318,19 @@ def _change_mapsetname_in_group(self, group_path, source_mapset, target_mapset): target_mapset(str): name of target mapset Raises: - This method will raise an AsyncProcessError if a group has no REF file + This method will raise an AsyncProcessError if a group has no REF + file """ group_dirs = os.listdir(group_path) for group_dir in group_dirs: group_file = os.path.join(group_path, group_dir, "REF") if os.path.isfile(group_file): for line in fileinput.input(group_file, inplace=True): - print(line.replace( - source_mapset, target_mapset), end='') + print(line.replace(source_mapset, target_mapset), end="") else: - raise AsyncProcessError("group %s has no REF file" - % (group_dir)) + raise AsyncProcessError( + "group %s has no REF file" % (group_dir) + ) def _update_views_in_tgis(self, tgis_db_path): """Update views in tgis sqlite.db @@ -301,8 +343,8 @@ def _update_views_in_tgis(self, tgis_db_path): cur = con.cursor() sql_script_folder = os.path.join(os.getenv("GISBASE"), "etc", "sql") - drop_view_sql = os.path.join(sql_script_folder, 'drop_views.sql') - with open(drop_view_sql, 'r') as sql: + drop_view_sql = os.path.join(sql_script_folder, "drop_views.sql") + with open(drop_view_sql, "r") as sql: sql_drop_str = sql.read() cur.executescript(sql_drop_str) @@ -312,11 +354,11 @@ def _update_views_in_tgis(self, tgis_db_path): "vector_views.sql", "strds_views.sql", "str3ds_views.sql", - "stvds_views.sql" + "stvds_views.sql", ] for view_sql_file_name in view_sql_file_names: view_sql_file = os.path.join(sql_script_folder, view_sql_file_name) - with open(view_sql_file, 'r') as sql: + with open(view_sql_file, "r") as sql: sql_view_str = sql.read() cur.executescript(sql_view_str) con.commit() @@ -337,32 +379,45 @@ def _merge_tgis_dbs(self, tgis_db_path_1, tgis_db_path_2): con.execute(f"ATTACH '{tgis_db_path_2}' as dba") con.execute("BEGIN") - table_names1 = [row[1] for row in con.execute( - "SELECT * FROM sqlite_master where type='table'")] - table_names2 = [row[1] for row in con.execute( - "SELECT * FROM dba.sqlite_master where type='table'")] + table_names1 = [ + row[1] + for row in con.execute( + "SELECT * FROM sqlite_master where type='table'" + ) + ] + table_names2 = [ + row[1] + for row in con.execute( + "SELECT * FROM dba.sqlite_master where type='table'" + ) + ] # merge databases for table in table_names2: - if table == 'tgis_metadata': + if table == "tgis_metadata": con.execute(f"DROP TABLE {table}") - con.execute(f"CREATE TABLE {table} AS " - f"SELECT * FROM dba.{table}") + con.execute( + f"CREATE TABLE {table} AS " f"SELECT * FROM dba.{table}" + ) continue # for example raster_register_xxx tables are not in both dbs if table not in table_names1: - con.execute(f"CREATE TABLE {table} AS " - f"SELECT * FROM dba.{table}") + con.execute( + f"CREATE TABLE {table} AS " f"SELECT * FROM dba.{table}" + ) continue - combine = f"INSERT OR IGNORE INTO {table} SELECT * FROM dba.{table}" + combine = ( + f"INSERT OR IGNORE INTO {table} SELECT * FROM dba.{table}" + ) con.execute(combine) con.commit() con.execute("detach database dba") if con: con.close() - def _change_mapsetname_in_tgis(self, tgis_path, source_mapset, - target_mapset, target_tgis_db): + def _change_mapsetname_in_tgis( + self, tgis_path, source_mapset, target_mapset, target_tgis_db + ): """Replaces the mapset name in the tgis sqlite.db Args: @@ -373,19 +428,29 @@ def _change_mapsetname_in_tgis(self, tgis_path, source_mapset, mapset. None if nonexistent. """ - tgis_db_path = os.path.join(tgis_path, 'sqlite.db') + tgis_db_path = os.path.join(tgis_path, "sqlite.db") # tables con = sqlite3.connect(tgis_db_path) cur = con.cursor() - table_names = [row[1] for row in cur.execute( - "SELECT * FROM sqlite_master where type='table'")] + table_names = [ + row[1] + for row in cur.execute( + "SELECT * FROM sqlite_master where type='table'" + ) + ] for table_name in table_names: - columns = [row[0] for row in cur.execute( - f"SELECT * FROM {table_name}").description] + columns = [ + row[0] + for row in cur.execute( + f"SELECT * FROM {table_name}" + ).description + ] for col in columns: - cur.execute(f"UPDATE {table_name} SET {col} = REPLACE({col}, " - f"'{source_mapset}', '{target_mapset}')") + cur.execute( + f"UPDATE {table_name} SET {col} = REPLACE({col}, " + f"'{source_mapset}', '{target_mapset}')" + ) con.commit() if con: con.close() @@ -406,46 +471,71 @@ def _merge_mapset_into_target(self, source_mapset, target_mapset): """ self.message_logger.info( "Copy source mapset <%s> content " - "into the target mapset <%s>" % (source_mapset, target_mapset)) + "into the target mapset <%s>" % (source_mapset, target_mapset) + ) # Raster, vector, group and space time data set directories/files - directories = ["cell", "misc", "fcell", - "cats", "cellhd", - "cell_misc", "colr", "colr2", - "hist", "vector", "group", "tgis", "VAR"] + directories = [ + "cell", + "misc", + "fcell", + "cats", + "cellhd", + "cell_misc", + "colr", + "colr2", + "hist", + "vector", + "group", + "tgis", + "VAR", + ] for directory in directories: source_path = os.path.join( - self.user_location_path, source_mapset, directory) + self.user_location_path, source_mapset, directory + ) target_path = os.path.join(self.user_location_path, target_mapset) if os.path.exists(source_path) is True: if directory == "group": self._change_mapsetname_in_group( - source_path, source_mapset, target_mapset) + source_path, source_mapset, target_mapset + ) if directory == "tgis": target_tgis_db = None - if os.path.isdir(os.path.join(target_path, 'tgis')): - target_tgis_db = os.path.join(target_path, 'tgis', 'sqlite.db') + if os.path.isdir(os.path.join(target_path, "tgis")): + target_tgis_db = os.path.join( + target_path, "tgis", "sqlite.db" + ) self._change_mapsetname_in_tgis( - source_path, source_mapset, target_mapset, - target_tgis_db) + source_path, + source_mapset, + target_mapset, + target_tgis_db, + ) if os.path.exists(source_path) is True: # Hardlink the sources into the target stdout = subprocess.PIPE stderr = subprocess.PIPE - p = subprocess.Popen(["/bin/cp", "-flr", - "%s" % source_path, - "%s/." % target_path], - stdout=stdout, - stderr=stderr) + p = subprocess.Popen( + [ + "/bin/cp", + "-flr", + "%s" % source_path, + "%s/." % target_path, + ], + stdout=stdout, + stderr=stderr, + ) (stdout_buff, stderr_buff) = p.communicate() if p.returncode != 0: raise AsyncProcessError( "Unable to merge mapsets. Error in linking:" - " stdout: %s stderr: %s" % (stdout_buff, stderr_buff)) + " stdout: %s stderr: %s" % (stdout_buff, stderr_buff) + ) def _copy_merge_tmp_mapset_to_target_mapset(self): """Copy the temporary mapset into the original location @@ -457,23 +547,32 @@ def _copy_merge_tmp_mapset_to_target_mapset(self): # Extent the mapset lock for an hour, since copying can take long if self.target_mapset_lock_set is True: - ret = self.lock_interface.extend(resource_id=self.target_mapset_lock_id, - expiration=3600) + ret = self.lock_interface.extend( + resource_id=self.target_mapset_lock_id, expiration=3600 + ) if ret == 0: - raise AsyncProcessError("Unable to extend lock for mapset " - "<%s>" % self.target_mapset_name) + raise AsyncProcessError( + "Unable to extend lock for mapset " + "<%s>" % self.target_mapset_name + ) if self.temp_mapset_lock_set is True: - ret = self.lock_interface.extend(resource_id=self.temp_mapset_lock_id, - expiration=3600) + ret = self.lock_interface.extend( + resource_id=self.temp_mapset_lock_id, expiration=3600 + ) if ret == 0: - raise AsyncProcessError("Unable to extend lock for " - "temporary mapset <%s>" % self.temp_mapset_name) + raise AsyncProcessError( + "Unable to extend lock for " + "temporary mapset <%s>" % self.temp_mapset_name + ) self.message_logger.info( - "Copy temporary mapset from %s to %s" % ( - self.temp_mapset_path, os.path.join( - self.user_location_path, self.target_mapset_name))) + "Copy temporary mapset from %s to %s" + % ( + self.temp_mapset_path, + os.path.join(self.user_location_path, self.target_mapset_name), + ) + ) source_path = self.temp_mapset_path @@ -482,47 +581,61 @@ def _copy_merge_tmp_mapset_to_target_mapset(self): # on merged into the target mapset and then removed if self.target_mapset_exists is True: target_path = self.user_location_path + "/." - message = "Copy temporary mapset <%s> to target location " \ - "<%s>" % (self.temp_mapset_name, self.location_name) + message = ( + "Copy temporary mapset <%s> to target location " + "<%s>" % (self.temp_mapset_name, self.location_name) + ) else: - target_path = os.path.join(self.user_location_path, self.target_mapset_name) - message = "Copy temporary mapset <%s> to target location " \ - "<%s>" % (self.target_mapset_name, self.location_name) + target_path = os.path.join( + self.user_location_path, self.target_mapset_name + ) + message = ( + "Copy temporary mapset <%s> to target location " + "<%s>" % (self.target_mapset_name, self.location_name) + ) self._send_resource_update(message) try: stdout = subprocess.PIPE stderr = subprocess.PIPE - p = subprocess.Popen(["/bin/cp", "-fr", - "%s" % source_path, - "%s" % target_path], - stdout=stdout, - stderr=stderr) + p = subprocess.Popen( + ["/bin/cp", "-fr", "%s" % source_path, "%s" % target_path], + stdout=stdout, + stderr=stderr, + ) (stdout_buff, stderr_buff) = p.communicate() if p.returncode != 0: raise AsyncProcessError( "Unable to copy temporary mapset to " "original location. Copy error " - "stdout: %s stderr: %s returncode: %i" % (stdout_buff, - stderr_buff, - p.returncode)) + "stdout: %s stderr: %s returncode: %i" + % (stdout_buff, stderr_buff, p.returncode) + ) except Exception as e: - raise AsyncProcessError("Unable to copy temporary mapset to " - "original location. Exception %s" % str(e)) + raise AsyncProcessError( + "Unable to copy temporary mapset to " + "original location. Exception %s" % str(e) + ) - # Merge the temp mapset into the target mapset in case the target already exists + # Merge the temp mapset into the target mapset in case the target + # already exists if self.target_mapset_exists is True: self._merge_mapset_into_target( - self.temp_mapset_name, self.target_mapset_name) - shutil.rmtree(os.path.join(self.user_location_path, self.temp_mapset_name)) + self.temp_mapset_name, self.target_mapset_name + ) + shutil.rmtree( + os.path.join(self.user_location_path, self.temp_mapset_name) + ) # remove interim results if self.interim_result.saving_interim_results is True: interim_dir = os.path.join( self.interim_result.user_resource_interim_storage_path, - self.resource_id) + self.resource_id, + ) self.message_logger.info( - "Remove interim results %s" % interim_dir) + "Remove interim results %s" % interim_dir + ) if os.path.isdir(interim_dir): shutil.rmtree(interim_dir) @@ -533,26 +646,33 @@ def _execute_process_list(self, process_list): process_list: The process list to execute Raises: - This method will raise an AsyncProcessError or AsyncProcessTermination + This method will raise an AsyncProcessError or + AsyncProcessTermination """ for process in process_list: # Extent the lock for each process by max processing time * 2 if self.target_mapset_lock_set is True: - ret = self.lock_interface.extend(resource_id=self.target_mapset_lock_id, - expiration=self.process_time_limit * 2) + ret = self.lock_interface.extend( + resource_id=self.target_mapset_lock_id, + expiration=self.process_time_limit * 2, + ) if ret == 0: raise AsyncProcessError( "Unable to extend lock for mapset <%s>" - % self.target_mapset_name) + % self.target_mapset_name + ) if self.temp_mapset_lock_set is True: # Extent the lock for each process by max processing time * 2 - ret = self.lock_interface.extend(resource_id=self.temp_mapset_lock_id, - expiration=self.process_time_limit * 2) + ret = self.lock_interface.extend( + resource_id=self.temp_mapset_lock_id, + expiration=self.process_time_limit * 2, + ) if ret == 0: raise AsyncProcessError( "Unable to extend lock for " - "temporary mapset <%s>" % self.temp_mapset_name) + "temporary mapset <%s>" % self.temp_mapset_name + ) if process.exec_type == "grass": self._run_module(process) @@ -564,15 +684,15 @@ def _execute_process_list(self, process_list): def _execute(self, skip_permission_check=False): """Overwrite this function in subclasses - - Call self._setup() - - Analyse the process chain - - Check the target mapset and lock it for the maximum time - a user can consume -> process_num_limit*process_time_limit - - Initialize and create the temporal database and mapset - or use the original mapset - - Run the modules and extend the lock each run - - Copy the mapset if it has not already exist - - Cleanup and unlock the mapset + - Call self._setup() + - Analyse the process chain + - Check the target mapset and lock it for the maximum time + a user can consume -> process_num_limit*process_time_limit + - Initialize and create the temporal database and mapset + or use the original mapset + - Run the modules and extend the lock each run + - Copy the mapset if it has not already exist + - Cleanup and unlock the mapset """ @@ -581,17 +701,23 @@ def _execute(self, skip_permission_check=False): # check if this is a job resumption if self.rdc.iteration is not None: # Create the process chain - pc_step, old_process_chain_list = \ - self._get_previous_iteration_process_chain() + ( + pc_step, + old_process_chain_list, + ) = self._get_previous_iteration_process_chain() self.interim_result.set_old_pc_step(pc_step) process_list = self._validate_process_chain( process_chain=self.request_data, old_process_chain=old_process_chain_list, - pc_step=pc_step) + pc_step=pc_step, + ) # check iterim results - interim_result_mapset, interim_result_file_path = \ - self.interim_result.check_interim_result_mapset( - pc_step, self.rdc.iteration - 1) + ( + interim_result_mapset, + interim_result_file_path, + ) = self.interim_result.check_interim_result_mapset( + pc_step, self.rdc.iteration - 1 + ) else: # Create the process chain process_list = self._validate_process_chain() @@ -608,20 +734,24 @@ def _execute(self, skip_permission_check=False): # Initialize the GRASS environment and switch into PERMANENT # mapset, which is always linked - self._create_grass_environment(grass_data_base=self.temp_grass_data_base, - mapset_name="PERMANENT") + self._create_grass_environment( + grass_data_base=self.temp_grass_data_base, + mapset_name="PERMANENT", + ) # Create the temporary mapset with the same name as the target # mapset and switch into it self._create_temporary_mapset( temp_mapset_name=self.target_mapset_name, interim_result_mapset=interim_result_mapset, - interim_result_file_path=interim_result_file_path) + interim_result_file_path=interim_result_file_path, + ) self.temp_mapset_name = self.target_mapset_name else: # Init GRASS environment and create the temporary mapset self._create_temporary_grass_environment( - source_mapset_name=self.target_mapset_name) + source_mapset_name=self.target_mapset_name + ) self._lock_temp_mapset() # Execute the process list @@ -633,7 +763,8 @@ def _execute(self, skip_permission_check=False): self._parse_module_outputs() def _final_cleanup(self): - """Final cleanup called in the run function at the very end of processing + """ + Final cleanup called in the run function at the very end of processing """ # Clean up and remove the temporary gisdbase self._cleanup() From 960ce051e4fdf4e0e268d2fe04597e110daef595 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Fri, 20 Jan 2023 14:39:48 +0100 Subject: [PATCH 03/46] Add raster VRT support and support for mapset names by interim results (#410) * add raster VRT support and support for mapset names by interim results Co-authored-by: anikaweinmann , linakrisztian --- .../ephemeral/persistent_processing.py | 187 +----------------- 1 file changed, 10 insertions(+), 177 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py index d367ac3..9bb9d97 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py @@ -25,21 +25,20 @@ Asynchronous computation in specific temporary generated and then copied or original mapsets """ -import fileinput import os import shutil -import sqlite3 import subprocess from actinia_core.processing.actinia_processing.ephemeral_processing import ( EphemeralProcessing, ) from actinia_core.core.common.exceptions import AsyncProcessError +from actinia_core.core.mapset_merge_utils import change_mapsetname __license__ = "GPLv3" -__author__ = "Sören Gebbert, Guido Riembauer, Anika Weinmann" +__author__ = "Sören Gebbert, Guido Riembauer, Anika Weinmann, Lina Krisztian" __copyright__ = ( - "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" + "Copyright 2016-2023, Sören Gebbert and mundialis GmbH & Co. KG" ) __maintainer__ = "mundialis" @@ -307,162 +306,6 @@ def _lock_target_mapset(self): # if we manage to come here, the lock was correctly set self.target_mapset_lock_set = True - def _change_mapsetname_in_group( - self, group_path, source_mapset, target_mapset - ): - """Replaces the mapset name in the group file - - Args: - group_path(str): path of the group folder in the source mapset - source_mapset(str): name of source mapset - target_mapset(str): name of target mapset - - Raises: - This method will raise an AsyncProcessError if a group has no REF - file - """ - group_dirs = os.listdir(group_path) - for group_dir in group_dirs: - group_file = os.path.join(group_path, group_dir, "REF") - if os.path.isfile(group_file): - for line in fileinput.input(group_file, inplace=True): - print(line.replace(source_mapset, target_mapset), end="") - else: - raise AsyncProcessError( - "group %s has no REF file" % (group_dir) - ) - - def _update_views_in_tgis(self, tgis_db_path): - """Update views in tgis sqlite.db - - Args: - tgis_db_path(str): Path to the tgis sqlite.db file where the views - should be updated - """ - con = sqlite3.connect(tgis_db_path) - cur = con.cursor() - - sql_script_folder = os.path.join(os.getenv("GISBASE"), "etc", "sql") - drop_view_sql = os.path.join(sql_script_folder, "drop_views.sql") - with open(drop_view_sql, "r") as sql: - sql_drop_str = sql.read() - cur.executescript(sql_drop_str) - - view_sql_file_names = [ - "raster_views.sql", - "raster3d_views.sql", - "vector_views.sql", - "strds_views.sql", - "str3ds_views.sql", - "stvds_views.sql", - ] - for view_sql_file_name in view_sql_file_names: - view_sql_file = os.path.join(sql_script_folder, view_sql_file_name) - with open(view_sql_file, "r") as sql: - sql_view_str = sql.read() - cur.executescript(sql_view_str) - con.commit() - if con: - con.close() - del cur - - def _merge_tgis_dbs(self, tgis_db_path_1, tgis_db_path_2): - """Merge two tgis sqlite.db files - - Args: - tgis_db_path_1(str): path of a tgis sqlite.db file in which the - other should be merged - tgis_db_path_2(str): path of a tgis sqlite.db file which should be - merged in tgis_db_path_1 - """ - con = sqlite3.connect(tgis_db_path_1) - con.execute(f"ATTACH '{tgis_db_path_2}' as dba") - con.execute("BEGIN") - - table_names1 = [ - row[1] - for row in con.execute( - "SELECT * FROM sqlite_master where type='table'" - ) - ] - table_names2 = [ - row[1] - for row in con.execute( - "SELECT * FROM dba.sqlite_master where type='table'" - ) - ] - - # merge databases - for table in table_names2: - if table == "tgis_metadata": - con.execute(f"DROP TABLE {table}") - con.execute( - f"CREATE TABLE {table} AS " f"SELECT * FROM dba.{table}" - ) - continue - # for example raster_register_xxx tables are not in both dbs - if table not in table_names1: - con.execute( - f"CREATE TABLE {table} AS " f"SELECT * FROM dba.{table}" - ) - continue - combine = ( - f"INSERT OR IGNORE INTO {table} SELECT * FROM dba.{table}" - ) - con.execute(combine) - con.commit() - con.execute("detach database dba") - if con: - con.close() - - def _change_mapsetname_in_tgis( - self, tgis_path, source_mapset, target_mapset, target_tgis_db - ): - """Replaces the mapset name in the tgis sqlite.db - - Args: - tgis_path(str): path of the tgis folder in the source mapset - source_mapset(str): name of source mapset - target_mapset(str): name of target mapset - target_tgis_db(str): path to existing tgis sqlite.db of target - mapset. None if nonexistent. - """ - - tgis_db_path = os.path.join(tgis_path, "sqlite.db") - - # tables - con = sqlite3.connect(tgis_db_path) - cur = con.cursor() - table_names = [ - row[1] - for row in cur.execute( - "SELECT * FROM sqlite_master where type='table'" - ) - ] - for table_name in table_names: - columns = [ - row[0] - for row in cur.execute( - f"SELECT * FROM {table_name}" - ).description - ] - for col in columns: - cur.execute( - f"UPDATE {table_name} SET {col} = REPLACE({col}, " - f"'{source_mapset}', '{target_mapset}')" - ) - con.commit() - if con: - con.close() - del cur - - # if there already exists a sqlite.db file then merge it - if target_tgis_db is not None: - self._merge_tgis_dbs(tgis_db_path, target_tgis_db) - - # update views - self._update_views_in_tgis(tgis_db_path) - def _merge_mapset_into_target(self, source_mapset, target_mapset): """Link the source mapset content into the target mapset @@ -497,23 +340,13 @@ def _merge_mapset_into_target(self, source_mapset, target_mapset): ) target_path = os.path.join(self.user_location_path, target_mapset) - if os.path.exists(source_path) is True: - if directory == "group": - self._change_mapsetname_in_group( - source_path, source_mapset, target_mapset - ) - if directory == "tgis": - target_tgis_db = None - if os.path.isdir(os.path.join(target_path, "tgis")): - target_tgis_db = os.path.join( - target_path, "tgis", "sqlite.db" - ) - self._change_mapsetname_in_tgis( - source_path, - source_mapset, - target_mapset, - target_tgis_db, - ) + change_mapsetname( + source_path, + directory, + source_mapset, + target_mapset, + target_path, + ) if os.path.exists(source_path) is True: # Hardlink the sources into the target From b942acdf9e70d9207fc5ec69b15e5a0fb322d680 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Thu, 29 Jun 2023 12:14:28 +0200 Subject: [PATCH 04/46] Black (#458) * make resource resource delition asynchron * trailing and ending precommit ... * do not use pylint * trigger linting * linting * linting --------- Co-authored-by: anikaweinmann --- .../actinia_processing/ephemeral/persistent_processing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py index 9bb9d97..bb510c9 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py @@ -209,7 +209,6 @@ def _check_mapset(self, mapset): and os.access(self.user_location_path, os.R_OK | os.X_OK | os.W_OK) is True ): - self.orig_mapset_path = os.path.join( self.user_location_path, mapset ) From 56e8b23b436a5b10395a6dd116788854a30640b8 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Wed, 4 Dec 2024 10:59:37 +0100 Subject: [PATCH 05/46] Renamed GRASS GIS locations to projects (#565) * add decorator for deprecated locations * add project endpoints * endpoints for projects * rename location * rename location * unify maintainer * fix GRASS Initialisation * black * fixes for tests and G83 * fixes * add rest of changes * Update tests/test_job_resumption.py * fix endpoints and job resumption tests * Update src/actinia_core/core/common/api_logger.py * Tests for G84 * Test pipelines for G8.3 and G8.4 * Update src/actinia_core/rest/base/resource_base.py * fix test workflow * fix tests for G83 * fix error due to lib update * fix proc name * Update src/actinia_core/endpoints.py Co-authored-by: Carmen Tawalika --------- Co-authored-by: Carmen Tawalika Co-authored-by: Carmen --- .../ephemeral/persistent_processing.py | 94 +++++++++---------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py index bb510c9..c7f15f0 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py @@ -4,7 +4,7 @@ # performance processing of geographical data that uses GRASS GIS for # computational tasks. For details, see https://actinia.mundialis.de/ # -# Copyright (c) 2016-2022 Sören Gebbert and mundialis GmbH & Co. KG +# Copyright (c) 2016-2024 Sören Gebbert and mundialis GmbH & Co. KG # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -38,9 +38,10 @@ __license__ = "GPLv3" __author__ = "Sören Gebbert, Guido Riembauer, Anika Weinmann, Lina Krisztian" __copyright__ = ( - "Copyright 2016-2023, Sören Gebbert and mundialis GmbH & Co. KG" + "Copyright 2016-2024, Sören Gebbert and mundialis GmbH & Co. KG" ) -__maintainer__ = "mundialis" +__maintainer__ = "mundialis GmbH & Co. KG" +__email__ = "info@mundialis.de" class PersistentProcessing(EphemeralProcessing): @@ -48,7 +49,7 @@ class PersistentProcessing(EphemeralProcessing): This class is designed to run GRASS modules that are specified in a process chain in a temporary mapset that later on is copied into the - original location or merged into an existing mapset. + original project or merged into an existing mapset. Locking concept: @@ -69,7 +70,7 @@ class PersistentProcessing(EphemeralProcessing): If target mapset does not exists: - After processing finished successfully, copy the - temporary mapset to the original user group specific location using + temporary mapset to the original user group specific project using the target mapset name - Unlock the two mapsets after processing is finished, terminated or raised an error @@ -96,26 +97,26 @@ def __init__(self, rdc): # We have two mapset lock ids. The target mapset and the temporary # mapset self.target_mapset_lock_id = self._generate_mapset_lock_id( - self.user_group, self.location_name, self.target_mapset_name + self.user_group, self.project_name, self.target_mapset_name ) self.temp_mapset_lock_id = self._generate_mapset_lock_id( - self.user_group, self.location_name, self.temp_mapset_name + self.user_group, self.project_name, self.temp_mapset_name ) self.temp_mapset_lock_set = False - def _generate_mapset_lock_id(self, user_group, location_name, mapset_name): + def _generate_mapset_lock_id(self, user_group, project_name, mapset_name): """Generate a unique id to lock a mapset in the redis database - Locations are user group specific. Hence different user groups may have - locations with the same names and with equal mapset names. + Projects are user group specific. Hence different user groups may have + projects with the same names and with equal mapset names. - In the same user group, a location/mapset must be locked to grant + In the same user group, a project/mapset must be locked to grant exclusive access rights. Args: user_group: The user group used for locking - location_name: The location name in which the mapset is located + project_name: The project name in which the mapset is located for locking mapset_name: The mapset name that should be locked @@ -123,7 +124,7 @@ def _generate_mapset_lock_id(self, user_group, location_name, mapset_name): The lock id """ - return "%s/%s/%s" % (user_group, location_name, mapset_name) + return "%s/%s/%s" % (user_group, project_name, mapset_name) def _lock_temp_mapset(self): """Lock the temporary mapset @@ -153,9 +154,9 @@ def _check_mapset(self, mapset): """Check if the target mapset exists This method will check if the target mapset exists in the global and - user group locations. + user group projects. If the mapset is in the global database, then an AsyncProcessError - will be raised, since global location/mapsets can not be modified. + will be raised, since global project/mapsets can not be modified. This method sets in case of success: @@ -168,20 +169,20 @@ def _check_mapset(self, mapset): """ mapset_exists = False - # Check if the global location is accessible and that the target mapset + # Check if the global project is accessible and that the target mapset # does not exist if self.is_global_database is True: # Break if the target mapset exists in the global database if ( - os.path.exists(self.global_location_path) - and os.path.isdir(self.global_location_path) + os.path.exists(self.global_project_path) + and os.path.isdir(self.global_project_path) and os.access( - self.global_location_path, os.R_OK | os.X_OK | os.W_OK + self.global_project_path, os.R_OK | os.X_OK | os.W_OK ) is True ): self.orig_mapset_path = os.path.join( - self.global_location_path, mapset + self.global_project_path, mapset ) if os.path.exists(self.orig_mapset_path) is True: @@ -197,20 +198,19 @@ def _check_mapset(self, mapset): ) else: raise AsyncProcessError( - "Unable to access global location <%s>" - % self.location_name + "Unable to access global project <%s>" % self.project_name ) # Always check if the target mapset already exists and set the flag # accordingly if ( - os.path.exists(self.user_location_path) - and os.path.isdir(self.user_location_path) - and os.access(self.user_location_path, os.R_OK | os.X_OK | os.W_OK) + os.path.exists(self.user_project_path) + and os.path.isdir(self.user_project_path) + and os.access(self.user_project_path, os.R_OK | os.X_OK | os.W_OK) is True ): self.orig_mapset_path = os.path.join( - self.user_location_path, mapset + self.user_project_path, mapset ) if os.path.exists(self.orig_mapset_path) is True: @@ -233,7 +233,7 @@ def _check_mapset(self, mapset): mapset_exists = False else: raise AsyncProcessError( - "Unable to access user location <%s>" % self.location_name + "Unable to access user project <%s>" % self.project_name ) return mapset_exists @@ -242,7 +242,7 @@ def _check_target_mapset_exists(self): """Check if the target mapset exists This method will check if the target mapset exists in the global and - user location. + user project. If the mapset is in the global database, then an AsyncProcessError will be raised, since global mapsets can not be modified. @@ -261,7 +261,7 @@ def _check_lock_target_mapset(self): mapset This method will check if the target mapset exists in the global and - user location. + user project. If the mapset is in the global database, then an AsyncProcessError will be raised, since global mapsets can not be modified. @@ -293,13 +293,13 @@ def _lock_target_mapset(self): if ret == 0: raise AsyncProcessError( - "Unable to lock location/mapset <%s/%s>, " + "Unable to lock project/mapset <%s/%s>, " "resource is already locked" - % (self.location_name, self.target_mapset_name) + % (self.project_name, self.target_mapset_name) ) self.message_logger.info( - "location/mapset <%s/%s> locked" - % (self.location_name, self.target_mapset_name) + "project/mapset <%s/%s> locked" + % (self.project_name, self.target_mapset_name) ) # if we manage to come here, the lock was correctly set @@ -335,9 +335,9 @@ def _merge_mapset_into_target(self, source_mapset, target_mapset): for directory in directories: source_path = os.path.join( - self.user_location_path, source_mapset, directory + self.user_project_path, source_mapset, directory ) - target_path = os.path.join(self.user_location_path, target_mapset) + target_path = os.path.join(self.user_project_path, target_mapset) change_mapsetname( source_path, @@ -370,7 +370,7 @@ def _merge_mapset_into_target(self, source_mapset, target_mapset): ) def _copy_merge_tmp_mapset_to_target_mapset(self): - """Copy the temporary mapset into the original location + """Copy the temporary mapset into the original project In case the mapset does not exists, then use the target mapset name, otherwise use the temporary mapset name for copying which is later on @@ -402,7 +402,7 @@ def _copy_merge_tmp_mapset_to_target_mapset(self): "Copy temporary mapset from %s to %s" % ( self.temp_mapset_path, - os.path.join(self.user_location_path, self.target_mapset_name), + os.path.join(self.user_project_path, self.target_mapset_name), ) ) @@ -412,18 +412,18 @@ def _copy_merge_tmp_mapset_to_target_mapset(self): # otherwise use the temporary mapset name for copying which is later # on merged into the target mapset and then removed if self.target_mapset_exists is True: - target_path = self.user_location_path + "/." + target_path = self.user_project_path + "/." message = ( - "Copy temporary mapset <%s> to target location " - "<%s>" % (self.temp_mapset_name, self.location_name) + "Copy temporary mapset <%s> to target project " + "<%s>" % (self.temp_mapset_name, self.project_name) ) else: target_path = os.path.join( - self.user_location_path, self.target_mapset_name + self.user_project_path, self.target_mapset_name ) message = ( - "Copy temporary mapset <%s> to target location " - "<%s>" % (self.target_mapset_name, self.location_name) + "Copy temporary mapset <%s> to target project " + "<%s>" % (self.target_mapset_name, self.project_name) ) self._send_resource_update(message) @@ -440,14 +440,14 @@ def _copy_merge_tmp_mapset_to_target_mapset(self): if p.returncode != 0: raise AsyncProcessError( "Unable to copy temporary mapset to " - "original location. Copy error " + "original project. Copy error " "stdout: %s stderr: %s returncode: %i" % (stdout_buff, stderr_buff, p.returncode) ) except Exception as e: raise AsyncProcessError( "Unable to copy temporary mapset to " - "original location. Exception %s" % str(e) + "original project. Exception %s" % str(e) ) # Merge the temp mapset into the target mapset in case the target @@ -457,7 +457,7 @@ def _copy_merge_tmp_mapset_to_target_mapset(self): self.temp_mapset_name, self.target_mapset_name ) shutil.rmtree( - os.path.join(self.user_location_path, self.temp_mapset_name) + os.path.join(self.user_project_path, self.temp_mapset_name) ) # remove interim results if self.interim_result.saving_interim_results is True: @@ -588,7 +588,7 @@ def _execute(self, skip_permission_check=False): # Execute the process list self._execute_process_list(process_list) - # Copy local mapset to original location, merge mapsets + # Copy local mapset to original project, merge mapsets # if necessary self._copy_merge_tmp_mapset_to_target_mapset() # Parse the module sdtout outputs and create the results From 092023207bc61f3f16459c5b869c1c64fd0ce11f Mon Sep 17 00:00:00 2001 From: linakrisztian <106728040+linakrisztian@users.noreply.github.com> Date: Tue, 15 Apr 2025 14:13:53 +0200 Subject: [PATCH 06/46] Remaining changes for switch of redis to valkey (#601) * update setup from redis to valkey * replace redis with valkey calls + remove non needed redis-remainings * replace redis import with valkey import * adjust function from redis to valkey * replace redis by valkey function * fix call of valkey commands fitting valkey-syntax * Semantic renaming: redis to kvdb (#602) * semantic renaming: redis to kvdb * remove non semantic changes * remove non semantic changes * Apply suggestions from code review Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * remove non semantic changes * update readme * Update src/actinia_core/README.md Co-authored-by: Carmen Tawalika * Update src/actinia_core/core/common/config.py Co-authored-by: Carmen Tawalika * Update src/actinia_core/README.md Co-authored-by: Carmen Tawalika * Update docker/actinia-core-alpine/actinia.cfg Co-authored-by: Carmen Tawalika * PR review CT --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Carmen Tawalika * linting: black --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Carmen Tawalika --- .../actinia_processing/ephemeral/persistent_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py index c7f15f0..21df14e 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py @@ -106,7 +106,7 @@ def __init__(self, rdc): self.temp_mapset_lock_set = False def _generate_mapset_lock_id(self, user_group, project_name, mapset_name): - """Generate a unique id to lock a mapset in the redis database + """Generate a unique id to lock a mapset in the kvdb database Projects are user group specific. Hence different user groups may have projects with the same names and with equal mapset names. From e559132808620e121a79a74ddf12de3b51afc9e4 Mon Sep 17 00:00:00 2001 From: Carmen Tawalika Date: Fri, 1 Apr 2022 11:34:10 +0300 Subject: [PATCH 07/46] Refactor rest - part 2 (#322) * reactivate redis queue * rename variable * make queue type configurable * enhance dev setup for redis queue * lint * move user_auth * move user auth * move base_login * move map_layer_base * move renderer_base * move resource_base * fix relative imports * lint * first splitup: raster_colors * splitup download_cache_management * splitup ephemeral_custom_processing * splitup ephemeral_processing_with_export * splitup ephemeral_processing * splitup renderer_base * splitup persistent_processing * splitup location_management * splitup map_layer_management * splitup mapset_management * splitup persistent_mapset_merger * splitup process_validation * splitup raster_export * splitup raster_layer * splitup raster_legend * lint * splitup raster_renderer * splitup resource_storage_management * splitup strds_management * splitup strds_raster_management * splitup strds_renderer * splitup vector_layer * splitup vector_renderer * lint * make inheritance more clear * add readme * fix import --- .../ephemeral_processing.py | 1690 +++++++++++++++++ 1 file changed, 1690 insertions(+) create mode 100644 src/actinia_core/processing/actinia_processing/ephemeral_processing.py diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py new file mode 100644 index 0000000..300c792 --- /dev/null +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -0,0 +1,1690 @@ +# -*- coding: utf-8 -*- +####### +# actinia-core - an open source REST API for scalable, distributed, high +# performance processing of geographical data that uses GRASS GIS for +# computational tasks. For details, see https://actinia.mundialis.de/ +# +# Copyright (c) 2016-2022 Sören Gebbert and mundialis GmbH & Co. KG +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +####### + +""" +Base class for asynchronous processing +""" + +import math +import os +import pickle +import requests +import shutil +import subprocess +import sys +import tempfile +import time +import traceback +import uuid + +from flask import json +from requests.auth import HTTPBasicAuth + +from actinia_core.core.common.process_object import Process +from actinia_core.core.grass_init import GrassInitializer +from actinia_core.core.messages_logger import MessageLogger +from actinia_core.core.redis_lock import RedisLockingInterface +from actinia_core.core.resources_logger import ResourceLogger +from actinia_core.core.common.process_chain import ProcessChainConverter +from actinia_core.core.common.exceptions \ + import AsyncProcessError, AsyncProcessTermination, RsyncError +from actinia_core.core.common.exceptions import AsyncProcessTimeLimit +from actinia_core.models.response_models \ + import ProcessingResponseModel, ExceptionTracebackModel +from actinia_core.models.response_models \ + import create_response_from_model, ProcessLogModel, ProgressInfoModel +from actinia_core.core.interim_results import InterimResult, get_directory_size +from actinia_core.rest.base.user_auth import check_location_mapset_module_access + +__license__ = "GPLv3" +__author__ = "Sören Gebbert, Anika Weinmann" +__copyright__ = "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" +__maintainer__ = "mundialis" + + +class EphemeralProcessing(object): + """This class processes GRASS data on the local machine in an temporary mapset. + + The temporary mapset will be removed by this class when the processing finished + + Creating the temporary database and mapset: + + 1. Create a new gisdbase in a temporary directory + + e.g: /tmp/soeren_disdbase + + 2. Create the required location directory + + e.g: /tmp/soeren_temp_gisdbase/ECAD + + 3. Softlink the PERMANENT and all required mapsets into the, + new location directory from the original location, + check the input parameter of the module for which mapsets must be linked + + e.g: /mount/groups/[user group]/locations/ECAD/PERMANENT + -> /tmp/soeren_temp_gisdbase/ECAD/PERMANENT + e.g: /mount/groups/[user group]/locations/ECAD/Temperature + -> /tmp/soeren_temp_gisdbase/ECAD/Temperature + + 4. Set the GRASS GIS environmental variables to point to the new gisdbase, + location and PERMANENT maspet + + 5. Create a new mapset with g.mapset in the temporary location directory + + e.g: /tmp/soeren_temp_gisdbase/ECAD/MyMapset + + 6. Run and poll each process in the process chain until finished and update + the resource log entry + - Create a poll loop and check the subprocess status + - Update the resource entry with progress info + + 7. When finished create the response: + * put the output of all modules into the response document + + 8. Remove the temporary gisdbase + + 9. Send the response document with status finished and resource links + + """ + + def __init__(self, rdc): + """Constructor + + Args: + rdc (ResourceDataContainer): The data container that contains all + required variables for processing + + """ + # Fluentd hack to work in a multiprocessing environment + try: + from fluent import sender + + self.has_fluent = True + except Exception: + self.has_fluent = False + + # rdc = ResourceDataContainer() + + self.rdc = rdc + self.config = self.rdc.config + self.data = self.rdc.user_data + self.grass_temp_database = self.config.GRASS_TMP_DATABASE + + self.request_data = self.rdc.request_data + self.user_id = self.rdc.user_id + self.user_group = self.rdc.user_group + self.user_credentials = rdc.user_credentials + + self.resource_id = self.rdc.resource_id + self.iteration = self.rdc.iteration + self.status_url = self.rdc.status_url + self.api_info = self.rdc.api_info + self.interim_result = InterimResult( + self.user_id, self.resource_id, self.iteration) + + self.grass_data_base = self.rdc.grass_data_base # Global database + # User database base path, this path will be + self.grass_user_data_base = self.rdc.grass_user_data_base + # extended with the user group name in the setup + self.grass_base_dir = self.rdc.grass_base_dir + + self.location_name = self.rdc.location_name + self.mapset_name = self.rdc.mapset_name + # Set this True if the work is performed based on global database + self.is_global_database = False + + self.map_name = self.rdc.map_name + + self.orig_time = self.rdc.orig_time + self.orig_datetime = self.rdc.orig_datetime + + # Create the unique temporary gisdbase and mapset names + self.unique_id = str(uuid.uuid4()).replace("-", "") + self.temp_grass_data_base_name = "gisdbase_" + self.unique_id + self.temp_mapset_name = "mapset_" + self.unique_id + self.temp_mapset_path = None + + self.ginit = None + + # Successfully finished message + self.finish_message = "Processing successfully finished" + + # The temporary GRASS database + self.temp_grass_data_base = None # Path to the temporary grass database + self.temp_location_path = None # Path to the temporary location + self.temp_file_path = None # The path to store temporary created files + self.global_location_path = None # The path to the global location to link + self.user_location_path = None # The path to the user location to link + + # List of resources that should be created + self.resource_export_list = list() + self.resource_url_list = list() + + # Initialize the user specific permissions + self.cell_limit = 0 + self.process_time_limit = 0 + self.process_num_limit = 0 + # Set this True so that regions are not checked before processing + self.skip_region_check = False + + # The stdout, stderr and parameter log of the module chains + self.module_output_log = list() + # The stdout, stderr and parameter log of the module chains + # using a dict with the process id as key + self.module_output_dict = dict() + # The list of output parser definitions that must be applied + # after the module run. The parser result will be stored in + # the module_result dictionary using the parser id + self.output_parser_list = [] + # A dictionary that has the process id as key to store module + self.module_results = dict() + # outputs like images, dicts, files and so on + + self.required_mapsets = list() # The process chain analysis will provide + # a list of required mapsets that must be + # linked in the temporary location + + # The module that was called in the process chain, to detect g.region calls + # and check for correct region settings + self.last_module = "g.region" + # Count the processes executed from the process chain + self.process_count = 0 + + self.ginit = None + + # The state of the whole processing that is checked in the final + # section of the run function + self.run_state = {"success": None} + + # The progress info object + self.progress = ProgressInfoModel( + step=0, num_of_steps=0) + # The count of self._run_process() and self._run_module() calls + self.progress_steps = 0 + # The number of processes that should be processes + self.number_of_processes = 0 + + self.setup_flag = False + + # The names of the temporarily generated files "key":"temporary_file_path" + self.temporary_pc_files = {} + # The counter to generate unique temporary file names + self.temp_file_count = 0 + # This dictionary contains the output of a process on stdout + self.process_dict = {} + + # The class that is used to create the response + self.response_model_class = ProcessingResponseModel + # The class that converts process chain definitions into + self.proc_chain_converter = None + # process lists that will be executed. This variable is + # initiated in the setup method + # The list of all process chains that were processed + self.process_chain_list = [] + # A list of all processes that will be executed + self.actinia_process_list = list() + # A list of all processes that will be executed + self.actinia_process_dict = dict() + # The URL of a webhook that should becalled after processing of a + self.webhook_finished = None + # process chain finished + + # The URL of a webhook that should be called for each status/progress update + self.webhook_update = None + # The authentication for the webhook (base 64 decoded "username:password") + self.webhook_auth = None + + def _send_resource_update(self, message, results=None): + """Create an HTTP response document and send it to the status database + + Args: + message (str): The message + results (dict): Results of the processing using the process chain + id for identification + + """ + data = create_response_from_model(self.response_model_class, + status="running", + user_id=self.user_id, + resource_id=self.resource_id, + iteration=self.iteration, + # process_log=self.module_output_log, + progress=self.progress, + results=results, + message=message, + orig_time=self.orig_time, + orig_datetime=self.orig_datetime, + http_code=200, + status_url=self.status_url, + api_info=self.api_info) + self._send_to_database(document=data, final=False) + + def _send_resource_finished(self, message, results=None): + """Create an HTTP response document and send it to the status database + + Args: + message (str): The message + results (dict): Results of the processing using the process chain + id for identification + + """ + data = create_response_from_model(self.response_model_class, + status="finished", + user_id=self.user_id, + resource_id=self.resource_id, + iteration=self.iteration, + process_log=self.module_output_log, + progress=self.progress, + results=results, + message=message, + orig_time=self.orig_time, + orig_datetime=self.orig_datetime, + http_code=200, + status_url=self.status_url, + api_info=self.api_info, + resource_urls=self.resource_url_list, + process_chain_list=self.process_chain_list) + self._send_to_database(document=data, final=True) + + def _send_resource_terminated(self, message, results=None): + """Create an HTTP response document and send it to the status database + + Args: + message (str): The message + results (dict): Results of the processing using the process chain + id for identification + + """ + data = create_response_from_model(self.response_model_class, + status="terminated", + user_id=self.user_id, + resource_id=self.resource_id, + iteration=self.iteration, + process_log=self.module_output_log, + progress=self.progress, + results=results, + message=message, + orig_time=self.orig_time, + orig_datetime=self.orig_datetime, + http_code=200, + status_url=self.status_url, + api_info=self.api_info, + process_chain_list=self.process_chain_list) + self._send_to_database(document=data, final=True) + + def _send_resource_time_limit_exceeded(self, message, results=None): + """Create an HTTP response document and send it to the status database + + Args: + message (str): The message + results (dict): Results of the processing using the process chain + id for identification + + """ + data = create_response_from_model(self.response_model_class, + status="terminated", + user_id=self.user_id, + resource_id=self.resource_id, + iteration=self.iteration, + process_log=self.module_output_log, + progress=self.progress, + results=results, + message=message, + orig_time=self.orig_time, + orig_datetime=self.orig_datetime, + http_code=400, + status_url=self.status_url, + api_info=self.api_info, + process_chain_list=self.process_chain_list) + self._send_to_database(document=data, final=True) + + def _send_resource_error(self, message, results=None, exception=None): + """Create an HTTP response document and send it to the status database + + Args: + message (str): The message + results (dict): Results of the processing using the process chain + id for identification + + """ + data = create_response_from_model(self.response_model_class, + status="error", + user_id=self.user_id, + resource_id=self.resource_id, + iteration=self.iteration, + process_log=self.module_output_log, + progress=self.progress, + results=results, + message=message, + orig_time=self.orig_time, + orig_datetime=self.orig_datetime, + http_code=400, + status_url=self.status_url, + api_info=self.api_info, + process_chain_list=self.process_chain_list, + exception=exception) + self._send_to_database(document=data, final=True) + + def _send_to_database(self, document, final=False): + """Send the document to the database + + The resource expiration time set in the actinia config file will be used + for every resource commit. + + If a webhook URL is provided, the JSON response will be send to the + provided endpoint using a POST request. + + Args: + document (str): The response document + final (bool): Set True if this was the final resource commit + (no update) to activate the webhook call + + """ + + self.resource_logger.commit( + user_id=self.user_id, resource_id=self.resource_id, + iteration=self.iteration, document=document, + expiration=self.config.REDIS_RESOURCE_EXPIRE_TIME) + + # Call the webhook after the final result was send to the database + try: + if final is True and self.webhook_finished is not None: + self._post_to_webhook(document, 'finished') + elif final is False and self.webhook_update is not None: + self._post_to_webhook(document, 'update') + except Exception as e: + e_type, e_value, e_tb = sys.exc_info() + model = ExceptionTracebackModel(message=str(e_value), + traceback=traceback.format_tb(e_tb), + type=str(e_type)) + run_state = {"error": str(e), "exception": model} + print(str(run_state)) + self.message_logger.error( + "Unable to send webhook request. Traceback: %s" % str(run_state)) + + def _post_to_webhook(self, document, type): + """Helper method to send a post request to a webhook. + The finished webhook will be retried until it is reached of the number + of tries is WEBHOOK_RETRIES which can be set in the config. + + Args: + document (str): The response document + type (str): The webhook type: 'finished' or 'update' + """ + self.message_logger.info( + "Send POST request to %s webhook url: %s" % (type, self.webhook_finished)) + webhook_url = None + if type == 'finished': + webhook_url = self.webhook_finished + webhook_retries = self.config.WEBHOOK_RETRIES + webhook_sleep = self.config.WEBHOOK_SLEEP + if type == 'update': + webhook_url = self.webhook_update + webhook_retries = 1 + webhook_sleep = 0 + + http_code, response_model = pickle.loads(document) + + webhook_not_reached = True + retry = 0 + while webhook_not_reached is True and retry < webhook_retries: + retry += 1 + try: + if self.webhook_auth: + # username is expected to be without colon (':') + resp = requests.post( + webhook_url, json=json.dumps(response_model), + auth=HTTPBasicAuth( + self.webhook_auth.split(':')[0], + ':'.join(self.webhook_auth.split(':')[1:])), + timeout=10) + else: + resp = requests.post(webhook_url, json=json.dumps(response_model), + timeout=10) + if not (500 <= resp.status_code and resp.status_code < 600): + webhook_not_reached = False + except Exception: + time.sleep(webhook_sleep) + if ((webhook_not_reached is False and resp.status_code not in [200, 204]) + or webhook_not_reached is True): + raise AsyncProcessError( + "Unable to access %s webhook URL %s" % (type, webhook_url)) + + def _get_previous_iteration_process_chain(self): + """Helper method to check the old resource run and get the step of the + process chain where to continue + + Returns: + pc_step (int): The number of the step in the process chain where to + continue + old_process_chain (dict): The process chain of the old resource run + """ + # check old resource + pc_step = 0 + + for iter in range(1, self.rdc.iteration): + if iter == 1: + old_response_data = self.resource_logger.get( + self.user_id, self.resource_id) + else: + old_response_data = self.resource_logger.get( + self.user_id, self.resource_id, iter) + if old_response_data is None: + return None + _, response_model = pickle.loads(old_response_data) + for element in response_model['process_log']: + self.module_output_dict[element['id']] = element + + pc_step += response_model['progress']['step'] - 1 + old_process_chain = response_model['process_chain_list'][0] + + return pc_step, old_process_chain + + def _validate_process_chain(self, process_chain=None, + skip_permission_check=False, + old_process_chain=None, pc_step=None): + """ + Create the process list and check for user permissions. + + The following permissions are checked: + + - If the required modules are in the users module white list + - If the user is allowed to access the required mapsets that + are specified in the module input parameter + - If the user is allowed to execute the number of processes + + Args: + process_chain (dict): The process chain to be checked and converted + into a process list + skip_permission_check (bool): If set True, the permission checks + of module access and process num + limits are not performed + old_process_chain (dict): The process chain of the previous + resource run to be checked and converted + for e.g. stdout + + Raises: + This function raises AsyncProcessError in case of an error. + + Returns: list: + The process list + """ + + if old_process_chain is not None: + self.proc_chain_converter.process_chain_to_process_list(old_process_chain) + self.proc_chain_converter.import_descr_list = list() + self.proc_chain_converter.resource_export_list = list() + + # Backward compatibility + if process_chain is None: + process_list = self.proc_chain_converter.process_chain_to_process_list( + self.request_data) + self.process_chain_list.append(self.request_data) + else: + process_list = self.proc_chain_converter.process_chain_to_process_list( + process_chain) + self.process_chain_list.append(process_chain) + if pc_step is not None: + del process_list[:pc_step] + + # Check for the webhook + if (hasattr(self.proc_chain_converter, 'webhook_finished') + and self.proc_chain_converter.webhook_finished is not None): + self.webhook_finished = self.proc_chain_converter.webhook_finished + if (hasattr(self.proc_chain_converter, 'webhook_update') + and self.proc_chain_converter.webhook_update is not None): + self.webhook_update = self.proc_chain_converter.webhook_update + if (hasattr(self.proc_chain_converter, 'webhook_auth') + and self.proc_chain_converter.webhook_auth is not None): + self.webhook_auth = self.proc_chain_converter.webhook_auth + + # Check for empty process chain + if len(process_list) == 0 and len(self.resource_export_list) == 0: + raise AsyncProcessError("Empty process chain, nothing to compute") + + # Check if the user is allowed to execute this number of processes + if (skip_permission_check is False + and len(process_list) > self.process_num_limit): + raise AsyncProcessError( + "Process limit exceeded, a maximum of %i " + "processes are allowed in the process chain." % self.process_num_limit) + + # Check if the module description was correct and if the + # module or executable is in the user white list. + for process in process_list: + # Add the process to the internal list and dict + # to access it in the python udf environment + self._add_actinia_process(process) + + if process.exec_type == "grass" or process.exec_type == "exec": + if skip_permission_check is False: + if process.skip_permission_check is False: + resp = check_location_mapset_module_access( + user_credentials=self.user_credentials, + config=self.config, + module_name=process.executable) + if resp is not None: + raise AsyncProcessError( + "Module or executable <%s> is not supported" + % process.executable) + else: + message = ( + "Wrong process description, type: %s " + "module/executable: %s, args: %s" % ( + str(process.exec_type), + str(process.executable), + str(process.executable_params))) + raise AsyncProcessError(message) + + # Update the processing + self._update_num_of_steps(len(process_list)) + + return process_list + + def _setup(self, init_grass=True): + """Setup the logger, the mapset lock and the credentials. Create the + temporary grass database and temporary file directories + + ATTENTION: This method must be called first before any processing can + take place + + What is done: + + - Create the resource and message logger + - Create the redis lock interface for resource locking + - Set cell limit, process number limit and process time limit from user c + redentials. + - Create all required paths to original and temporary location and mapsets. + - temp_location_path + - global_location_path + - grass_user_data_base <- This path will be created if it does not exist + - user_location_path <- This path will be created if it does not exist + - temp_grass_data_base <- This path will be created + - temp_file_path <- This path will be created + - Check if the current working location is in a persistent (global) + GRASS GIS database (is_global_database) + - Create the process chain to process list converter + + Args: + init_grass (bool): Set true to initialize the user credentials + and the temporary database and location paths + + """ + # The setup should only be executed once + if self.setup_flag is True: + return + else: + self.setup_flag = True + + # fluent sender for this subprocess + fluent_sender = None + if self.has_fluent is True: + from fluent import sender + fluent_sender = sender.FluentSender('actinia_core_logger', + host=self.config.LOG_FLUENT_HOST, + port=self.config.LOG_FLUENT_PORT) + kwargs = dict() + kwargs['host'] = self.config.REDIS_SERVER_URL + kwargs['port'] = self.config.REDIS_SERVER_PORT + if self.config.REDIS_SERVER_PW and self.config.REDIS_SERVER_PW is not None: + kwargs['password'] = self.config.REDIS_SERVER_PW + self.resource_logger = ResourceLogger(**kwargs, + fluent_sender=fluent_sender) + + self.message_logger = MessageLogger( + config=self.config, user_id=self.user_id, fluent_sender=fluent_sender) + + self.lock_interface = RedisLockingInterface() + self.lock_interface.connect(**kwargs) + del kwargs + self.process_time_limit = int( + self.user_credentials["permissions"]["process_time_limit"]) + + # Check and create all required paths to global, user and temporary locations + if init_grass is True: + self._setup_paths() + + self.proc_chain_converter = ProcessChainConverter( + config=self.config, + temp_file_path=self.temp_file_path, + process_dict=self.process_dict, + temporary_pc_files=self.temporary_pc_files, + required_mapsets=self.required_mapsets, + resource_export_list=self.resource_export_list, + output_parser_list=self.output_parser_list, + message_logger=self.message_logger, + send_resource_update=self._send_resource_update) + + def _setup_paths(self): + """Helper method to setup the paths + """ + self.cell_limit = int(self.user_credentials["permissions"]["cell_limit"]) + self.process_num_limit = int( + self.user_credentials["permissions"]["process_num_limit"]) + # Setup the required paths + self.temp_grass_data_base = os.path.join( + self.grass_temp_database, self.temp_grass_data_base_name) + self.temp_file_path = os.path.join(self.temp_grass_data_base, ".tmp") + + if self.location_name: + self.temp_location_path = os.path.join( + self.temp_grass_data_base, self.location_name) + self.global_location_path = os.path.join( + self.grass_data_base, self.location_name) + # Create the user database path if it does not exist + if not os.path.exists(self.grass_user_data_base): + os.mkdir(self.grass_user_data_base) + # Create the user group specific path, if it does not exist and set the, + # grass user database path accordingly + self.grass_user_data_base = os.path.join( + self.grass_user_data_base, self.user_group) + if not os.path.exists(self.grass_user_data_base): + os.mkdir(self.grass_user_data_base) + # Create the user group specific location path, if it does not exist + self.user_location_path = os.path.join( + self.grass_user_data_base, self.location_name) + if not os.path.exists(self.user_location_path): + os.mkdir(self.user_location_path) + # Check if the location is located in the global database + self.is_global_database = False + location = os.path.join(self.grass_data_base, self.location_name) + if os.path.isdir(location): + self.is_global_database = True + # Create the database, location and temporary file directories + os.mkdir(self.temp_grass_data_base) + os.mkdir(self.temp_file_path) + + def _create_temp_database(self, mapsets=None): + """Create a temporary gis database with location and mapsets + from the global and user group database for processing. + + IMPORTANT: All processing and mapaste management is performed within a + temporary database! + + Link the required existing mapsets of global and user group locations + into the temporary location directory. + + Linking is performed in two steps: + 1.) If the location is a global location, then the mapsets from the + global location are linked in the temporary locations + 2.) Then link all required mapsets from the user group location into + the temporary location + + Only mapsets from the global location are linked into the temporary + location to which the user group has access. + It checks for access in the global database but not in the user group database. + The user can always access its own data of its group. + + Args: + mapsets: A list of mapset names that should be linked into + the temporary location. If the list is empty, all + available user accessible mapsets of the global + and user group specific location will be linked. + + Raises: + This function raises AsyncProcessError in case of an error. + + """ + # Assign default mapsets + if mapsets is None: + mapsets = [] + + try: + # Create the temporary location directory + os.mkdir(self.temp_location_path) + + # Always link the PERMANENT mapset + if len(mapsets) > 0 and "PERMANENT" not in mapsets: + mapsets.append("PERMANENT") + + mapsets_to_link = [] + check_all_mapsets = False + if not mapsets: + check_all_mapsets = True + + # User and global location mapset linking + self._link_mapsets(mapsets, mapsets_to_link, check_all_mapsets) + + # Check if we missed some of the required mapsets + if check_all_mapsets is False: + mapset_list = [] + for mapset_path, mapset in mapsets_to_link: + mapset_list.append(mapset) + + for mapset in mapsets: + if mapset not in mapset_list: + raise AsyncProcessError( + "Unable to link all required mapsets into temporary " + "location. Missing or un-accessible mapset " + "<%s> in location <%s>" + % (mapset, self.location_name)) + + # Link the original mapsets from global and user database into the + # temporary location + for mapset_path, mapset in mapsets_to_link: + if os.path.isdir( + os.path.join(self.temp_location_path, mapset)) is False: + os.symlink(mapset_path, os.path.join( + self.temp_location_path, mapset)) + + except Exception as e: + raise AsyncProcessError("Unable to create a temporary GIS database" + ", Exception: %s" % str(e)) + + def _link_mapsets(self, mapsets, mapsets_to_link, check_all_mapsets): + """Helper method to link locations mapsets + + Args: + mapsets (list): List of mapsets in location + mapsets_to_link (list): List of mapsets paths to link + check_all_mapsets (bool): If set True, the mapsets list is created with + all locations on location_path + + Returns: + mapsets (list): List of mapsets in location + mapsets_to_link (list): List of mapsets paths to link + """ + # Global location mapset linking + if self.is_global_database is True: + # List all available mapsets in the global location + mapsets, mapsets_to_link = self._list_all_available_mapsets( + self.global_location_path, mapsets, + check_all_mapsets, mapsets_to_link, True) + # Check for leftover mapsets + left_over_mapsets = [] + for mapset in mapsets: + if mapset not in mapsets_to_link: + left_over_mapsets.append(mapset) + # List all available mapsets in the user location + mapsets, mapsets_to_link = self._list_all_available_mapsets( + self.user_location_path, left_over_mapsets, + check_all_mapsets, mapsets_to_link, False) + return mapsets, mapsets_to_link + + def _list_all_available_mapsets(self, location_path, mapsets, check_all_mapsets, + mapsets_to_link, global_db=False): + """Helper method to list all available mapsets and for global database + it is checked if the mapset can be accessed. + + Args: + location_path (str): Path to location (global or user) + mapsets (list): List of mapsets names to link. + The mapsets list can be empty, if check_all_mapsets is + True the list is filled with all mapsets from the + location_path + check_all_mapsets (bool): If set True, the mapsets list is created with + all locations on location_path + mapsets_to_link (list): List of mapset paths to link + global_db (bool): If set True, the location/mapset access is + checked + + Returns: + mapsets (list): List of mapsets in location + mapsets_to_link (list): List of mapsets paths to link + """ + if os.path.isdir(location_path): + if check_all_mapsets is True: + mapsets = os.listdir(location_path) + for mapset in mapsets: + mapset_path = os.path.join(location_path, mapset) + if (os.path.isdir(mapset_path) + and os.access(mapset_path, os.R_OK & os.X_OK)): + # Check if a WIND file exists to be sure it is a mapset + if os.path.isfile(os.path.join( + mapset_path, "WIND")) is True: + if mapset not in mapsets_to_link and global_db is True: + # Link the mapset from the global database + # only if it can be accessed + resp = check_location_mapset_module_access( + user_credentials=self.user_credentials, + config=self.config, + location_name=self.location_name, + mapset_name=mapset) + if resp is None: + mapsets_to_link.append((mapset_path, mapset)) + elif mapset not in mapsets_to_link and global_db is False: + mapsets_to_link.append((mapset_path, mapset)) + else: + raise AsyncProcessError( + "Invalid mapset <%s> in location <%s>" + % (mapset, self.location_name)) + else: + if global_db is True: + msg = "Unable to access global location <%s>" % self.location_name + else: + msg = "Unable to access user location <%s>" % self.location_name + raise AsyncProcessError(msg) + return mapsets, mapsets_to_link + + def _create_grass_environment(self, grass_data_base, mapset_name="PERMANENT"): + """Sets up the GRASS environment to run modules + + Args: + mapset_name (str): The mapset that should used for processing, + default is PERMANENT + + Raises: + This method will raise an AsyncProcessError if the mapset is + not in the access list of the user. + + """ + self.message_logger.info( + "Initlialize GRASS grass_data_base: %s; location: %s; mapset: %s" + % (grass_data_base, self.location_name, mapset_name)) + + self.ginit = GrassInitializer(grass_data_base=grass_data_base, + grass_base_dir=self.config.GRASS_GIS_BASE, + location_name=self.location_name, + mapset_name=mapset_name, config=self.config, + grass_addon_path=self.config.GRASS_ADDON_PATH, + user_id=self.user_id) + + self.ginit.initialize() + + def _create_temporary_mapset(self, temp_mapset_name, source_mapset_name=None, + interim_result_mapset=None, + interim_result_file_path=None): + """Create the temporary mapset and switch into it + + This method needs an initialized the GRASS environment. + + It will check access to all required mapsets and adds them to the mapset + search path. + + IMPORTANT: You need to call self._create_grass_environment() to set up + the environment before calling this method. + + A new temporary mapset is created. All in the process chain detected mapsets + of input maps or STDS will be added to the mapset search path using g.mapsets. + + Optionally the WIND file of a source mapset can be copied into the temporary + mapset. + + Args: + temp_mapset_name (str): The name of the temporary mapset to be created + source_mapset_name (str): The name of the source mapset to copy the + WIND file from + interim_result_mapset (str): The path to the mapset which is saved + as interim result and should be used + as start mapset for the job resumtion + interim_result_file_path (str): The path of the interim result + temporary file path + + + Raises: + This function will raise an exception if the + g.mapset/g.mapsets/db.connect modules fail + + """ + self.temp_mapset_path = os.path.join(self.temp_location_path, temp_mapset_name) + + # if interim_result_mapset is set copy the mapset from the interim + # results + if interim_result_mapset: + self.message_logger.info( + "Rsync interim result mapset to temporary GRASS DB") + rsync_status = self.interim_result.rsync_mapsets( + interim_result_mapset, self.temp_mapset_path) + if rsync_status != 'success': + raise RsyncError( + "Error while rsyncing of interim results to new temporare mapset") + if interim_result_file_path: + self.message_logger.info( + "Rsync interim result file path to temporary GRASS DB") + rsync_status = self.interim_result.rsync_mapsets( + interim_result_file_path, self.temp_file_path) + if rsync_status != 'success': + raise RsyncError( + "Error while rsyncing of interim temporary file path to new " + "temporare file path") + + self.ginit.run_module("g.mapset", ["-c", "mapset=%s" % temp_mapset_name]) + + if self.required_mapsets: + self.ginit.run_module("g.mapsets", + ["operation=add", + "mapset=%s" % (",".join(self.required_mapsets))]) + + self.message_logger.info("Added the following mapsets to the mapset " + "search path: " + ",".join(self.required_mapsets)) + + # Set the vector database connection to vector map specific databases + self.ginit.run_module("db.connect", [ + "driver=sqlite", + "database=$GISDBASE/$LOCATION_NAME/$MAPSET/vector/$MAP/sqlite.db"]) + + # self.ginit.run_module("g.gisenv", ["set=DEBUG=2",]) + + # If a source mapset is provided, the WIND file will be copied from it to the + # temporary mapset + if source_mapset_name is not None and interim_result_mapset is None: + source_mapset_path = os.path.join( + self.temp_location_path, source_mapset_name) + if os.path.exists(os.path.join(source_mapset_path, "WIND")): + shutil.copyfile(os.path.join(source_mapset_path, "WIND"), + os.path.join(self.temp_mapset_path, "WIND")) + + def _cleanup(self): + """Clean up the GrassInitializer files created in + self._setup() and remove the created temporary database. + + """ + if self.ginit: + self.ginit.clean_up() + + if self.temp_grass_data_base is not None and \ + os.path.exists(self.temp_grass_data_base) and \ + os.path.isdir(self.temp_grass_data_base): + shutil.rmtree(self.temp_grass_data_base, ignore_errors=True) + + def _check_reset_region(self): + """Check the current region settings against the user cell limit. + + Reset the current processing region to a meaningful state + so that the user cell limit is not reached and the mapset can be accessed again. + + Raises: + This method will raise an AsyncProcessError exception + + """ + if self.skip_region_check is True: + return + + errorid, stdout_buff, stderr_buff = self.ginit.run_module("g.region", ["-ug"]) + + if errorid != 0: + raise AsyncProcessError("Unable to check the computational region size") + + str_list = stdout_buff.split() + region = {} + for line in str_list: + if "=" in line: + option = line.split("=", 1) + region[option[0]] = option[1] + + self.message_logger.info(str(region)) + + num_cells = int(region["cells"]) + ns_res = float(region["nsres"]) + ew_res = float(region["ewres"]) + + if num_cells > self.cell_limit: + self._adjust_region_size(num_cells, ns_res, ew_res) + + def _adjust_region_size(self, num_cells, ns_res, ew_res): + """Helper method to adjust the region size + + Args: + num_cells (int): GRASS GIS number of cells of the region + ns_res (float): GRASS GIS north-south cell resolution of the region + ew_res (float): GRASS GIS east-west cell resolution of the region + + Raises: + This method will raise an AsyncProcessError exception + + """ + fak = num_cells / self.cell_limit + fak += 2.0 + fak = math.sqrt(fak) + 2.0 + ns_res = ns_res * fak + ew_res = ew_res * fak + errorid, stdout_buff, stderr_buff = self.ginit.run_module( + "g.region", ["nsres=%f" % ns_res, "ewres=%f" % ew_res, "-g"]) + self.message_logger.info(stdout_buff) + if errorid != 0: + raise AsyncProcessError( + "Unable to adjust the region settings to nsres: " + "%f ewres: %f error: %s" % (ns_res, ew_res, stderr_buff)) + raise AsyncProcessError( + "Region too large, set a coarser resolution to minimum nsres: " + "%f ewres: %f [num_cells: %d]" % (ns_res, ew_res, num_cells)) + + def _increment_progress(self, num=1): + """Increment the progress step by a specific number + + Args: + num (int): The number for which the progress should be increased + """ + self.progress_steps += num + self.progress["step"] = self.progress_steps + + def _add_actinia_process(self, process: Process): + """Add an actinia process to the list and dictionary + + Args: + process: The actinia process + """ + self.actinia_process_dict[process.id] = process + self.actinia_process_list.append(process) + + def _update_num_of_steps(self, num): + """Update the number of total steps + + Args: + num: The number of processes to be added to the total number of processes + """ + self.number_of_processes += num + self.progress["num_of_steps"] = self.number_of_processes + + def _wait_for_process(self, module_name, module_parameter, proc, poll_time): + """Wait for a specific process. Catch termination requests, process time limits + and send updates to the user. + + Args: + module_name: The name of the GRASS module or executable + module_parameter: The parameter of a GRASS module or a executable + proc: The process to wait for and monitor + poll_time: The poll time to send updates and check for termination + + Returns: + (float) + The run time in seconds + + """ + + start_time = time.time() + + termination_check_count = 0 + update_check_count = 0 + while True: + if proc.poll() is not None: + break + else: + # Sleep some time and update the resource status + time.sleep(poll_time) + termination_check_count += 1 + update_check_count += 1 + + # Check all 10 loops for termination + if termination_check_count == 10: + termination_check_count = 0 + # check if the resource should be terminated + # and kill the current process + if self.resource_logger.get_termination( + self.user_id, self.resource_id, self.iteration) is True: + proc.kill() + raise AsyncProcessTermination("Process <%s> was terminated " + "by user request" % module_name) + + # Send all 100 loops a status update + if update_check_count == 100: + update_check_count = 0 + # Check max runtime of process + curr_time = time.time() + if (curr_time - start_time) > self.process_time_limit: + proc.kill() + raise AsyncProcessTimeLimit( + "Time (%i seconds) exceeded to run executable %s" + % (self.process_time_limit, module_name)) + # Reduce the length of the command line parameters for lesser + # logging overhead + mparams = str(module_parameter) + if len(mparams) > 100: + mparams = "%s ... %s" % (mparams[0:50], mparams[-50:]) + message = ( + "Running executable %s with parameters %s for %g seconds" + % (module_name, mparams, curr_time - start_time)) + self._send_resource_update(message) + + return time.time() - start_time + + def _run_process(self, process, poll_time=0.05): + """Run a process actinia_core.core.common.process_object.Process) with options and send + progress updates to the resource database. + + IMPORTANT: Use this method to run programs that are not GRASS modules. + + Check each poll the termination status of the resource. + If the termination state is set True, terminate the current process + and raise an AsyncProcessTermination exception that must be caught + by the run() method. + + Args: + process actinia_core.core.common.process_object.Process): + The process object that should be executed + poll_time (float): The time to check the process status and to send + updates to the resource db + + Raises: + AsyncProcessError: + AsyncProcessTermination: + AsyncProcessTimeLimit: + + Returns: + tuple: + (returncode, stdout_buff, stderr_buff) + + """ + if self.resource_logger.get_termination( + self.user_id, self.resource_id, self.iteration) is True: + raise AsyncProcessTermination("Process <%s> was terminated by " + "user request" % process.executable) + + return self._run_executable(process, poll_time) + + def _run_module(self, process, poll_time=0.05): + """Run the GRASS module actinia_core.core.common.process_object.Process) with its module + options and send progress updates to the database server that manages + the resource entries. + + Check before each module run the size of the region. If the maximum number + of cells are exceeded then raise an AsyncProcessError exception that the + maximum number of cells are exceeded and reset them to a meaningful state, + so that the user can still run processes in the mapset. + + The region is checked for the first module of a process chain and after that, + for each g.region call that was present in the process chain. + + Check each poll the termination status of the resource. + If the termination state is set True, terminate the current process + and raise an AsyncProcessTermination exception that must be caught + by the run() method. + + By default the status of the running process is checked each 0.005 seconds. + This is the minimum amount of time a process can run. If many tiny + running processes are executed in a large process chain, then this value + must be adjusted. + + Args: + process actinia_core.core.common.process_object.Process): + The process object that should be executed + poll_time (float): The time to check the process status and to send + updates to the resource db + + Raises: + AsyncProcessError: + AsyncProcessTermination: + AsyncProcessTimeLimit: + + Returns: + tuple: + (returncode, stdout_buff, stderr_buff) + + """ + # Count the processes + self.process_count += 1 + # Check for each 20. process if a kill request was received + # This is required in case a single of many fast running processes in a chain + # is not able to trigger the termination check in the while loop + if self.process_count % 20 == 0: + if self.resource_logger.get_termination( + self.user_id, self.resource_id, self.iteration) is True: + raise AsyncProcessTermination("Process <%s> was terminated " + "by user request" % process.executable) + + message = "Running module %s with parameters %s" % ( + process.executable, str(process.executable_params)) + self._send_resource_update(message) + + # Check reset region if a g.region call was present in the process chain. + # By default the initial value of last_module is "g.region" to assure + # for first run of a process from the process chain, the + # region settings are evaluated + if self.last_module == "g.region" and process.skip_permission_check is False: + self._check_reset_region() + # Save the last module name. This is needed to check the region settings + self.last_module = process.executable + + return self._run_executable(process, poll_time) + + def _run_executable(self, process, poll_time=0.005): + """Runs a GRASS module or aactinia_core.core.Unix executable and sets up + the correct handling of stdout, stderr and stdin, creates the + process log model and returns stdout, stderr and the return code. + + It creates the temporary file paths. + + The returncode of 0 indicates that it ran successfully. A negative value -N + indicates that the child was terminated by signal N (POSIX only; see also + https://en.wikipedia.org/wiki/Signal_(IPC)#Default_action). + + Args: + process actinia_core.core.common.process_object.Process): + The process object that should be executed + poll_time (float): The time to check the process status and to send + updates to the resource db + + Raises: + AsyncProcessError: + AsyncProcessTermination: + AsyncProcessTimeLimit: + + Returns: + tuple: + (returncode, stdout_buff, stderr_buff) + + """ + + # Use temporary files to catch stdout and stderr + stdout_buff = tempfile.NamedTemporaryFile( + mode="w+b", delete=True, dir=self.temp_file_path) + stderr_buff = tempfile.NamedTemporaryFile( + mode="w+b", delete=True, dir=self.temp_file_path) + stdin_file = None + + if process.stdin_source is not None: + tmp_file = self.proc_chain_converter.generate_temp_file_path() + stdin_file = open(tmp_file, "w") + stdin_file.write(process.stdin_source()) + stdin_file.close() + stdin_file = open(tmp_file, "r") + + self._increment_progress(num=1) + + # print(process) + + # GRASS andactinia_core.core.Unix executables have different run methods + if process.exec_type in "grass": + proc = self.ginit.run_module(process.executable, + process.executable_params, raw=True, + stdout=stdout_buff, + stderr=stderr_buff, + stdin=stdin_file) + else: + inputlist = list() + inputlist.append(process.executable) + inputlist.extend(process.executable_params) + + proc = subprocess.Popen(args=inputlist, + stdout=stdout_buff, + stderr=stderr_buff, + stdin=stdin_file) + + run_time = self._wait_for_process(process.executable, + process.executable_params, + proc, poll_time) + + proc.wait() + + stdout_buff.flush() + stderr_buff.flush() + stdout_buff.seek(0) + stderr_buff.seek(0) + stdout_string = stdout_buff.read().decode() + stderr_string = stderr_buff.read().decode() + stdout_buff.close() + stderr_buff.close() + if stdin_file: + stdin_file.close() + + process.set_stdouts(stdout=stdout_string, stderr=stderr_string) + + kwargs = { + 'id': process.id, + 'executable': process.executable, + 'parameter': process.executable_params, + 'return_code': proc.returncode, + 'stdout': stdout_string, + 'stderr': stderr_string.split("\n"), + 'run_time': run_time} + if self.temp_mapset_path: + kwargs['mapset_size'] = get_directory_size(self.temp_mapset_path) + + plm = ProcessLogModel(**kwargs) + + self.module_output_log.append(plm) + # Store the log in an additional dictionary for automated output generation + if process.id is not None: + self.module_output_dict[process.id] = plm + + if proc.returncode != 0: + raise AsyncProcessError( + "Error while running executable <%s>" % process.executable) + + # save interim results + if (self.interim_result.saving_interim_results is True + and self.temp_mapset_path is not None): + self.interim_result.save_interim_results( + self.progress_steps, self.temp_mapset_path, self.temp_file_path) + elif self.temp_mapset_path is None: + self.message_logger.debug( + "No temp mapset path set. Because of that no interim results" + " can be saved!") + + return proc.returncode, stdout_string, stderr_string + + def _create_temporary_grass_environment(self, source_mapset_name=None, + interim_result_mapset=None, + interim_result_file_path=None): + """Create a temporary GRASS GIS environment + + This method will: + 1. create the temporary database + 2. sets-up the GRASS environment + 3. Create temporary mapset + + This method will link the required mapsets that are + defined in *self.required_mapsets* into the location. + The mapsets may be from the global and/or user database. + + Args: + source_mapset_name (str): The name of the source mapset to copy the + WIND file from + interim_result_mapset (str): The path to the mapset which is saved + as interim result and should be used + as start mapset for the job resumtion + interim_result_file_path (str): The path of the interim result + temporary file path + Raises: + This method will raise an AsyncProcessError + """ + + # Create the temp database and link the + # required mapsets into it + self._create_temp_database(self.required_mapsets) + + # Initialize the GRASS environment and switch into PERMANENT + # mapset, which is always linked + self._create_grass_environment(grass_data_base=self.temp_grass_data_base, + mapset_name="PERMANENT") + + # Create the temporary mapset and switch into it + self._create_temporary_mapset(temp_mapset_name=self.temp_mapset_name, + source_mapset_name=source_mapset_name, + interim_result_mapset=interim_result_mapset, + interim_result_file_path=interim_result_file_path) + + def _execute(self, skip_permission_check=False): + """Overwrite this function in subclasses. + + This function will be executed by the run() function + + - Setup logger and credentials + - Analyse the process chain + - Create the temporal database + - Initialize the GRASS environment and create the temporary mapset + - Run the modules + - Parse the stdout output of the modules and generate the module results + + Args: + skip_permission_check (bool): If set True, the permission checks of + module access and process num + limits are not performed + + Raises: + This method will raise an AsyncProcessError, AsyncProcessTimeLimit + or AsyncProcessTermination + + """ + # Create the process chain + if self.rdc.iteration is not None: + process_list = \ + self._create_temporary_grass_environment_and_process_list_for_iteration( + skip_permission_check=skip_permission_check) + else: + process_list = self._create_temporary_grass_environment_and_process_list( + skip_permission_check=skip_permission_check) + + # Run all executables + self._execute_process_list(process_list=process_list) + # Parse the module sdtout outputs and create the results + self._parse_module_outputs() + + def _create_temporary_grass_environment_and_process_list_for_iteration( + self, process_chain=None, skip_permission_check=False): + """Helper method to: + + - Setup logger and credentials + - Analyse the process chain + - Create the temporal database + - Initialize the GRASS environment and create the temporary mapset + - Return the created process list + + Args: + process_chain (dict): The process chain to be checked and converted + into a process list + skip_permission_check (bool): If set True, the permission checks + of module access and process num + limits are not performed + + Raises: + This method will raise an AsyncProcessError + + Returns: list + The process list to be executed by _execute_process_list() + + """ + # Setup the user credentials and logger + self._setup() + + # Create and check the process chain + pc_step, old_process_chain_list = self._get_previous_iteration_process_chain() + self.interim_result.set_old_pc_step(pc_step) + process_list = self._validate_process_chain( + process_chain=self.request_data, + old_process_chain=old_process_chain_list, + skip_permission_check=skip_permission_check, + pc_step=pc_step) + + # check iterim results + interim_result_mapset, interim_result_file_path = \ + self.interim_result.check_interim_result_mapset( + pc_step, self.iteration - 1) + if interim_result_mapset is None: + return None + + # Init GRASS and create the temporary mapset with the interim results + self._create_temporary_grass_environment( + interim_result_mapset=interim_result_mapset, + interim_result_file_path=interim_result_file_path) + + return process_list + + def _create_temporary_grass_environment_and_process_list( + self, process_chain=None, skip_permission_check=False): + """Helper method to: + + - Setup logger and credentials + - Analyse the process chain + - Create the temporal database + - Initialize the GRASS environment and create the temporary mapset + - Return the created process list + + Args: + process_chain (dict): The process chain to be checked and converted + into a process list + skip_permission_check (bool): If set True, the permission checks + of module access and process num + limits are not performed + + Raises: + This method will raise an AsyncProcessError + + Returns: list + The process list to be executed by _execute_process_list() + + """ + # Setup the user credentials and logger + self._setup() + + # Create and check the process chain + process_list = self._validate_process_chain( + process_chain=process_chain, + skip_permission_check=skip_permission_check) + + # Init GRASS and create the temporary mapset + self._create_temporary_grass_environment() + + return process_list + + def _parse_module_outputs(self): + """Parse the module stdout outputs and parse them into the required formats: + table, list or kv + + This functions analyzes the output_parser_list for entries to parse. + It will convert the stdout strings into tables, lists or key/value outputs + and stores the result in the module_result dictionary using the provided + id of the StdoutParser. + + """ + + for entry in self.output_parser_list: + for process_id, stdout_def in entry.items(): + id = stdout_def["id"] + format = stdout_def["format"] + delimiter = stdout_def["delimiter"] + if process_id not in self.module_output_dict: + raise AsyncProcessError( + "Unable to find process id in module output dictionary") + stdout = self.module_output_dict[process_id]["stdout"] + # Split the rows by the \n new line delimiter + rows = stdout.strip().split("\n") + if "table" in format: + result = [] + for row in rows: + row = row.strip() + values = row.split(delimiter) + value_list = [] + for value in values: + value_list.append(value.strip()) + result.append(value_list) + elif "list" in format: + result = [] + for row in rows: + value = row.strip() + result.append(value) + elif "kv" in format: + result = dict() + for row in rows: + row = row.strip() + key, value = row.split(delimiter, 1) + result[key.strip()] = value.strip() + elif "json" in format: + result = None + try: + result = {i[0]: i[1] for i in [ + entry.split(delimiter, 1) for entry in + stdout.strip('\n').split('\n')] + } + except Exception: + try: + result = json.loads(stdout) + except Exception: + pass + finally: + if not result: + result = stdout + else: + raise AsyncProcessError("Wrong stdout parser format") + + # Store the parser result + self.module_results[id] = result + + def _execute_process_list(self, process_list): + """Run all modules or executables that are specified in the process list + + Args: + process_list: The process list that was generated by + _validate_process_chain() which is also called in + _create_temporary_grass_environment_and_process_list() + + Raises: + This method will raise an AsyncProcessError, AsyncProcessTimeLimit + or AsyncProcessTermination + + """ + for process in process_list: + if process.exec_type == "grass": + self._run_module(process) + elif process.exec_type == "exec": + self._run_process(process) + elif process.exec_type == "python": + eval(process.executable) + + def _final_cleanup(self): + """Overwrite this function in subclasses to perform the final cleanup, + by default this function calls self._cleanup() to remove the temporary + gis database. + This function should not raise any exceptions. Extend the cleaning + functionality here. + """ + # Clean up and remove the temporary gisdbase + self._cleanup() + + def run(self): + """This function will run the processing and will catch and process + any Exceptions that were raised while processing. Call this function to run the + processing. + + You have to implement/overwrite two methods that are called here: + + * self._execute() + * self._final_cleanup() + + e_type, e_value, e_traceback = sys.exc_info() + message = [e.__class__, e_type, e_value, traceback.format_tb(e_traceback)] + message = pprint.pformat(message) + """ + + try: + # Run the _execute function that does all the work + self._execute() + except AsyncProcessTermination as e: + self.run_state = {"terminated": str(e)} + except AsyncProcessTimeLimit as e: + self.run_state = {"time limit exceeded": str(e)} + except AsyncProcessError as e: + e_type, e_value, e_tb = sys.exc_info() + model = ExceptionTracebackModel(message=str(e_value), + traceback=traceback.format_tb(e_tb), + type=str(e_type)) + self.run_state = {"error": str(e), "exception": model} + except KeyboardInterrupt as e: + e_type, e_value, e_tb = sys.exc_info() + model = ExceptionTracebackModel(message=str(e_value), + traceback=traceback.format_tb(e_tb), + type=str(e_type)) + self.run_state = {"error": str(e), "exception": model} + except Exception as e: + e_type, e_value, e_tb = sys.exc_info() + model = ExceptionTracebackModel(message=str(e_value), + traceback=traceback.format_tb(e_tb), + type=str(e_type)) + self.run_state = {"error": str(e), "exception": model} + finally: + try: + # Call the final cleanup, before sending the status messages + self._final_cleanup() + except Exception as e: + e_type, e_value, e_tb = sys.exc_info() + model = ExceptionTracebackModel(message=str(e_value), + traceback=traceback.format_tb(e_tb), + type=str(e_type)) + self.run_state = {"error": str(e), "exception": model} + # After all processing finished, send the final status + if "success" in self.run_state: + self._send_resource_finished(message=self.finish_message, + results=self.module_results) + elif "terminated" in self.run_state: + # Send an error message if an exception was raised + self._send_resource_terminated(message=self.run_state["terminated"]) + elif "time limit exceeded" in self.run_state: + self._send_resource_time_limit_exceeded( + message=self.run_state["time limit exceeded"]) + elif "error" in self.run_state: + # Send an error message if an exception was raised + self._send_resource_error( + message=self.run_state["error"], + exception=self.run_state["exception"]) + else: + self._send_resource_error(message="Unknown error") From c0f0f1bf63d82f387d6805a5960cd83f3f1e1a82 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Thu, 22 Sep 2022 15:19:34 +0200 Subject: [PATCH 08/46] Black (#378) * linting tests + black workflow * linting * black * further black * fix unittests * fix test Co-authored-by: anikaweinmann --- .../ephemeral_processing.py | 1081 +++++++++++------ 1 file changed, 677 insertions(+), 404 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py index 300c792..e34c076 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -46,26 +46,41 @@ from actinia_core.core.redis_lock import RedisLockingInterface from actinia_core.core.resources_logger import ResourceLogger from actinia_core.core.common.process_chain import ProcessChainConverter -from actinia_core.core.common.exceptions \ - import AsyncProcessError, AsyncProcessTermination, RsyncError +from actinia_core.core.common.exceptions import ( + AsyncProcessError, + AsyncProcessTermination, + RsyncError, +) from actinia_core.core.common.exceptions import AsyncProcessTimeLimit -from actinia_core.models.response_models \ - import ProcessingResponseModel, ExceptionTracebackModel -from actinia_core.models.response_models \ - import create_response_from_model, ProcessLogModel, ProgressInfoModel +from actinia_core.models.response_models import ( + ProcessingResponseModel, + ExceptionTracebackModel, +) +from actinia_core.models.response_models import ( + create_response_from_model, + ProcessLogModel, + ProgressInfoModel, +) from actinia_core.core.interim_results import InterimResult, get_directory_size -from actinia_core.rest.base.user_auth import check_location_mapset_module_access +from actinia_core.rest.base.user_auth import ( + check_location_mapset_module_access, +) __license__ = "GPLv3" __author__ = "Sören Gebbert, Anika Weinmann" -__copyright__ = "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" +__copyright__ = ( + "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" +) __maintainer__ = "mundialis" class EphemeralProcessing(object): - """This class processes GRASS data on the local machine in an temporary mapset. + """ + This class processes GRASS data on the local machine in an temporary + mapset. - The temporary mapset will be removed by this class when the processing finished + The temporary mapset will be removed by this class when the processing + finished Creating the temporary database and mapset: @@ -140,7 +155,8 @@ def __init__(self, rdc): self.status_url = self.rdc.status_url self.api_info = self.rdc.api_info self.interim_result = InterimResult( - self.user_id, self.resource_id, self.iteration) + self.user_id, self.resource_id, self.iteration + ) self.grass_data_base = self.rdc.grass_data_base # Global database # User database base path, this path will be @@ -170,10 +186,14 @@ def __init__(self, rdc): self.finish_message = "Processing successfully finished" # The temporary GRASS database - self.temp_grass_data_base = None # Path to the temporary grass database + self.temp_grass_data_base = ( + None # Path to the temporary grass database + ) self.temp_location_path = None # Path to the temporary location self.temp_file_path = None # The path to store temporary created files - self.global_location_path = None # The path to the global location to link + self.global_location_path = ( + None # The path to the global location to link + ) self.user_location_path = None # The path to the user location to link # List of resources that should be created @@ -200,12 +220,14 @@ def __init__(self, rdc): self.module_results = dict() # outputs like images, dicts, files and so on - self.required_mapsets = list() # The process chain analysis will provide + self.required_mapsets = ( + list() + ) # The process chain analysis will provide # a list of required mapsets that must be # linked in the temporary location - # The module that was called in the process chain, to detect g.region calls - # and check for correct region settings + # The module that was called in the process chain, to detect g.region + # calls and check for correct region settings self.last_module = "g.region" # Count the processes executed from the process chain self.process_count = 0 @@ -217,8 +239,7 @@ def __init__(self, rdc): self.run_state = {"success": None} # The progress info object - self.progress = ProgressInfoModel( - step=0, num_of_steps=0) + self.progress = ProgressInfoModel(step=0, num_of_steps=0) # The count of self._run_process() and self._run_module() calls self.progress_steps = 0 # The number of processes that should be processes @@ -226,7 +247,8 @@ def __init__(self, rdc): self.setup_flag = False - # The names of the temporarily generated files "key":"temporary_file_path" + # The names of the temporarily generated files + # "key":"temporary_file_path" self.temporary_pc_files = {} # The counter to generate unique temporary file names self.temp_file_count = 0 @@ -249,9 +271,11 @@ def __init__(self, rdc): self.webhook_finished = None # process chain finished - # The URL of a webhook that should be called for each status/progress update + # The URL of a webhook that should be called for each status/progress + # update self.webhook_update = None - # The authentication for the webhook (base 64 decoded "username:password") + # The authentication for the webhook (base 64 decoded + # "username:password") self.webhook_auth = None def _send_resource_update(self, message, results=None): @@ -263,20 +287,22 @@ def _send_resource_update(self, message, results=None): id for identification """ - data = create_response_from_model(self.response_model_class, - status="running", - user_id=self.user_id, - resource_id=self.resource_id, - iteration=self.iteration, - # process_log=self.module_output_log, - progress=self.progress, - results=results, - message=message, - orig_time=self.orig_time, - orig_datetime=self.orig_datetime, - http_code=200, - status_url=self.status_url, - api_info=self.api_info) + data = create_response_from_model( + self.response_model_class, + status="running", + user_id=self.user_id, + resource_id=self.resource_id, + iteration=self.iteration, + # process_log=self.module_output_log, + progress=self.progress, + results=results, + message=message, + orig_time=self.orig_time, + orig_datetime=self.orig_datetime, + http_code=200, + status_url=self.status_url, + api_info=self.api_info, + ) self._send_to_database(document=data, final=False) def _send_resource_finished(self, message, results=None): @@ -288,22 +314,24 @@ def _send_resource_finished(self, message, results=None): id for identification """ - data = create_response_from_model(self.response_model_class, - status="finished", - user_id=self.user_id, - resource_id=self.resource_id, - iteration=self.iteration, - process_log=self.module_output_log, - progress=self.progress, - results=results, - message=message, - orig_time=self.orig_time, - orig_datetime=self.orig_datetime, - http_code=200, - status_url=self.status_url, - api_info=self.api_info, - resource_urls=self.resource_url_list, - process_chain_list=self.process_chain_list) + data = create_response_from_model( + self.response_model_class, + status="finished", + user_id=self.user_id, + resource_id=self.resource_id, + iteration=self.iteration, + process_log=self.module_output_log, + progress=self.progress, + results=results, + message=message, + orig_time=self.orig_time, + orig_datetime=self.orig_datetime, + http_code=200, + status_url=self.status_url, + api_info=self.api_info, + resource_urls=self.resource_url_list, + process_chain_list=self.process_chain_list, + ) self._send_to_database(document=data, final=True) def _send_resource_terminated(self, message, results=None): @@ -315,21 +343,23 @@ def _send_resource_terminated(self, message, results=None): id for identification """ - data = create_response_from_model(self.response_model_class, - status="terminated", - user_id=self.user_id, - resource_id=self.resource_id, - iteration=self.iteration, - process_log=self.module_output_log, - progress=self.progress, - results=results, - message=message, - orig_time=self.orig_time, - orig_datetime=self.orig_datetime, - http_code=200, - status_url=self.status_url, - api_info=self.api_info, - process_chain_list=self.process_chain_list) + data = create_response_from_model( + self.response_model_class, + status="terminated", + user_id=self.user_id, + resource_id=self.resource_id, + iteration=self.iteration, + process_log=self.module_output_log, + progress=self.progress, + results=results, + message=message, + orig_time=self.orig_time, + orig_datetime=self.orig_datetime, + http_code=200, + status_url=self.status_url, + api_info=self.api_info, + process_chain_list=self.process_chain_list, + ) self._send_to_database(document=data, final=True) def _send_resource_time_limit_exceeded(self, message, results=None): @@ -341,21 +371,23 @@ def _send_resource_time_limit_exceeded(self, message, results=None): id for identification """ - data = create_response_from_model(self.response_model_class, - status="terminated", - user_id=self.user_id, - resource_id=self.resource_id, - iteration=self.iteration, - process_log=self.module_output_log, - progress=self.progress, - results=results, - message=message, - orig_time=self.orig_time, - orig_datetime=self.orig_datetime, - http_code=400, - status_url=self.status_url, - api_info=self.api_info, - process_chain_list=self.process_chain_list) + data = create_response_from_model( + self.response_model_class, + status="terminated", + user_id=self.user_id, + resource_id=self.resource_id, + iteration=self.iteration, + process_log=self.module_output_log, + progress=self.progress, + results=results, + message=message, + orig_time=self.orig_time, + orig_datetime=self.orig_datetime, + http_code=400, + status_url=self.status_url, + api_info=self.api_info, + process_chain_list=self.process_chain_list, + ) self._send_to_database(document=data, final=True) def _send_resource_error(self, message, results=None, exception=None): @@ -367,29 +399,31 @@ def _send_resource_error(self, message, results=None, exception=None): id for identification """ - data = create_response_from_model(self.response_model_class, - status="error", - user_id=self.user_id, - resource_id=self.resource_id, - iteration=self.iteration, - process_log=self.module_output_log, - progress=self.progress, - results=results, - message=message, - orig_time=self.orig_time, - orig_datetime=self.orig_datetime, - http_code=400, - status_url=self.status_url, - api_info=self.api_info, - process_chain_list=self.process_chain_list, - exception=exception) + data = create_response_from_model( + self.response_model_class, + status="error", + user_id=self.user_id, + resource_id=self.resource_id, + iteration=self.iteration, + process_log=self.module_output_log, + progress=self.progress, + results=results, + message=message, + orig_time=self.orig_time, + orig_datetime=self.orig_datetime, + http_code=400, + status_url=self.status_url, + api_info=self.api_info, + process_chain_list=self.process_chain_list, + exception=exception, + ) self._send_to_database(document=data, final=True) def _send_to_database(self, document, final=False): """Send the document to the database - The resource expiration time set in the actinia config file will be used - for every resource commit. + The resource expiration time set in the actinia config file will be + used for every resource commit. If a webhook URL is provided, the JSON response will be send to the provided endpoint using a POST request. @@ -402,25 +436,32 @@ def _send_to_database(self, document, final=False): """ self.resource_logger.commit( - user_id=self.user_id, resource_id=self.resource_id, - iteration=self.iteration, document=document, - expiration=self.config.REDIS_RESOURCE_EXPIRE_TIME) + user_id=self.user_id, + resource_id=self.resource_id, + iteration=self.iteration, + document=document, + expiration=self.config.REDIS_RESOURCE_EXPIRE_TIME, + ) # Call the webhook after the final result was send to the database try: if final is True and self.webhook_finished is not None: - self._post_to_webhook(document, 'finished') + self._post_to_webhook(document, "finished") elif final is False and self.webhook_update is not None: - self._post_to_webhook(document, 'update') + self._post_to_webhook(document, "update") except Exception as e: e_type, e_value, e_tb = sys.exc_info() - model = ExceptionTracebackModel(message=str(e_value), - traceback=traceback.format_tb(e_tb), - type=str(e_type)) + model = ExceptionTracebackModel( + message=str(e_value), + traceback=traceback.format_tb(e_tb), + type=str(e_type), + ) run_state = {"error": str(e), "exception": model} print(str(run_state)) self.message_logger.error( - "Unable to send webhook request. Traceback: %s" % str(run_state)) + "Unable to send webhook request. Traceback: %s" + % str(run_state) + ) def _post_to_webhook(self, document, type): """Helper method to send a post request to a webhook. @@ -432,13 +473,15 @@ def _post_to_webhook(self, document, type): type (str): The webhook type: 'finished' or 'update' """ self.message_logger.info( - "Send POST request to %s webhook url: %s" % (type, self.webhook_finished)) + "Send POST request to %s webhook url: %s" + % (type, self.webhook_finished) + ) webhook_url = None - if type == 'finished': + if type == "finished": webhook_url = self.webhook_finished webhook_retries = self.config.WEBHOOK_RETRIES webhook_sleep = self.config.WEBHOOK_SLEEP - if type == 'update': + if type == "update": webhook_url = self.webhook_update webhook_retries = 1 webhook_sleep = 0 @@ -453,22 +496,30 @@ def _post_to_webhook(self, document, type): if self.webhook_auth: # username is expected to be without colon (':') resp = requests.post( - webhook_url, json=json.dumps(response_model), + webhook_url, + json=json.dumps(response_model), auth=HTTPBasicAuth( - self.webhook_auth.split(':')[0], - ':'.join(self.webhook_auth.split(':')[1:])), - timeout=10) + self.webhook_auth.split(":")[0], + ":".join(self.webhook_auth.split(":")[1:]), + ), + timeout=10, + ) else: - resp = requests.post(webhook_url, json=json.dumps(response_model), - timeout=10) + resp = requests.post( + webhook_url, + json=json.dumps(response_model), + timeout=10, + ) if not (500 <= resp.status_code and resp.status_code < 600): webhook_not_reached = False except Exception: time.sleep(webhook_sleep) - if ((webhook_not_reached is False and resp.status_code not in [200, 204]) - or webhook_not_reached is True): + if ( + webhook_not_reached is False and resp.status_code not in [200, 204] + ) or webhook_not_reached is True: raise AsyncProcessError( - "Unable to access %s webhook URL %s" % (type, webhook_url)) + "Unable to access %s webhook URL %s" % (type, webhook_url) + ) def _get_previous_iteration_process_chain(self): """Helper method to check the old resource run and get the step of the @@ -485,24 +536,30 @@ def _get_previous_iteration_process_chain(self): for iter in range(1, self.rdc.iteration): if iter == 1: old_response_data = self.resource_logger.get( - self.user_id, self.resource_id) + self.user_id, self.resource_id + ) else: old_response_data = self.resource_logger.get( - self.user_id, self.resource_id, iter) + self.user_id, self.resource_id, iter + ) if old_response_data is None: return None _, response_model = pickle.loads(old_response_data) - for element in response_model['process_log']: - self.module_output_dict[element['id']] = element + for element in response_model["process_log"]: + self.module_output_dict[element["id"]] = element - pc_step += response_model['progress']['step'] - 1 - old_process_chain = response_model['process_chain_list'][0] + pc_step += response_model["progress"]["step"] - 1 + old_process_chain = response_model["process_chain_list"][0] return pc_step, old_process_chain - def _validate_process_chain(self, process_chain=None, - skip_permission_check=False, - old_process_chain=None, pc_step=None): + def _validate_process_chain( + self, + process_chain=None, + skip_permission_check=False, + old_process_chain=None, + pc_step=None, + ): """ Create the process list and check for user permissions. @@ -531,31 +588,45 @@ def _validate_process_chain(self, process_chain=None, """ if old_process_chain is not None: - self.proc_chain_converter.process_chain_to_process_list(old_process_chain) + self.proc_chain_converter.process_chain_to_process_list( + old_process_chain + ) self.proc_chain_converter.import_descr_list = list() self.proc_chain_converter.resource_export_list = list() # Backward compatibility if process_chain is None: - process_list = self.proc_chain_converter.process_chain_to_process_list( - self.request_data) + process_list = ( + self.proc_chain_converter.process_chain_to_process_list( + self.request_data + ) + ) self.process_chain_list.append(self.request_data) else: - process_list = self.proc_chain_converter.process_chain_to_process_list( - process_chain) + process_list = ( + self.proc_chain_converter.process_chain_to_process_list( + process_chain + ) + ) self.process_chain_list.append(process_chain) if pc_step is not None: del process_list[:pc_step] # Check for the webhook - if (hasattr(self.proc_chain_converter, 'webhook_finished') - and self.proc_chain_converter.webhook_finished is not None): + if ( + hasattr(self.proc_chain_converter, "webhook_finished") + and self.proc_chain_converter.webhook_finished is not None + ): self.webhook_finished = self.proc_chain_converter.webhook_finished - if (hasattr(self.proc_chain_converter, 'webhook_update') - and self.proc_chain_converter.webhook_update is not None): + if ( + hasattr(self.proc_chain_converter, "webhook_update") + and self.proc_chain_converter.webhook_update is not None + ): self.webhook_update = self.proc_chain_converter.webhook_update - if (hasattr(self.proc_chain_converter, 'webhook_auth') - and self.proc_chain_converter.webhook_auth is not None): + if ( + hasattr(self.proc_chain_converter, "webhook_auth") + and self.proc_chain_converter.webhook_auth is not None + ): self.webhook_auth = self.proc_chain_converter.webhook_auth # Check for empty process chain @@ -563,11 +634,15 @@ def _validate_process_chain(self, process_chain=None, raise AsyncProcessError("Empty process chain, nothing to compute") # Check if the user is allowed to execute this number of processes - if (skip_permission_check is False - and len(process_list) > self.process_num_limit): + if ( + skip_permission_check is False + and len(process_list) > self.process_num_limit + ): raise AsyncProcessError( "Process limit exceeded, a maximum of %i " - "processes are allowed in the process chain." % self.process_num_limit) + "processes are allowed in the process chain." + % self.process_num_limit + ) # Check if the module description was correct and if the # module or executable is in the user white list. @@ -582,18 +657,23 @@ def _validate_process_chain(self, process_chain=None, resp = check_location_mapset_module_access( user_credentials=self.user_credentials, config=self.config, - module_name=process.executable) + module_name=process.executable, + ) if resp is not None: raise AsyncProcessError( "Module or executable <%s> is not supported" - % process.executable) + % process.executable + ) else: message = ( "Wrong process description, type: %s " - "module/executable: %s, args: %s" % ( + "module/executable: %s, args: %s" + % ( str(process.exec_type), str(process.executable), - str(process.executable_params))) + str(process.executable_params), + ) + ) raise AsyncProcessError(message) # Update the processing @@ -612,13 +692,16 @@ def _setup(self, init_grass=True): - Create the resource and message logger - Create the redis lock interface for resource locking - - Set cell limit, process number limit and process time limit from user c - redentials. - - Create all required paths to original and temporary location and mapsets. + - Set cell limit, process number limit and process time limit from user + c redentials. + - Create all required paths to original and temporary location and + mapsets. - temp_location_path - global_location_path - - grass_user_data_base <- This path will be created if it does not exist - - user_location_path <- This path will be created if it does not exist + - grass_user_data_base <- This path will be created if it does not + exist + - user_location_path <- This path will be created if it does not + exist - temp_grass_data_base <- This path will be created - temp_file_path <- This path will be created - Check if the current working location is in a persistent (global) @@ -640,27 +723,39 @@ def _setup(self, init_grass=True): fluent_sender = None if self.has_fluent is True: from fluent import sender - fluent_sender = sender.FluentSender('actinia_core_logger', - host=self.config.LOG_FLUENT_HOST, - port=self.config.LOG_FLUENT_PORT) + + fluent_sender = sender.FluentSender( + "actinia_core_logger", + host=self.config.LOG_FLUENT_HOST, + port=self.config.LOG_FLUENT_PORT, + ) kwargs = dict() - kwargs['host'] = self.config.REDIS_SERVER_URL - kwargs['port'] = self.config.REDIS_SERVER_PORT - if self.config.REDIS_SERVER_PW and self.config.REDIS_SERVER_PW is not None: - kwargs['password'] = self.config.REDIS_SERVER_PW - self.resource_logger = ResourceLogger(**kwargs, - fluent_sender=fluent_sender) + kwargs["host"] = self.config.REDIS_SERVER_URL + kwargs["port"] = self.config.REDIS_SERVER_PORT + if ( + self.config.REDIS_SERVER_PW + and self.config.REDIS_SERVER_PW is not None + ): + kwargs["password"] = self.config.REDIS_SERVER_PW + self.resource_logger = ResourceLogger( + **kwargs, fluent_sender=fluent_sender + ) self.message_logger = MessageLogger( - config=self.config, user_id=self.user_id, fluent_sender=fluent_sender) + config=self.config, + user_id=self.user_id, + fluent_sender=fluent_sender, + ) self.lock_interface = RedisLockingInterface() self.lock_interface.connect(**kwargs) del kwargs self.process_time_limit = int( - self.user_credentials["permissions"]["process_time_limit"]) + self.user_credentials["permissions"]["process_time_limit"] + ) - # Check and create all required paths to global, user and temporary locations + # Check and create all required paths to global, user and temporary + # locations if init_grass is True: self._setup_paths() @@ -673,36 +768,45 @@ def _setup(self, init_grass=True): resource_export_list=self.resource_export_list, output_parser_list=self.output_parser_list, message_logger=self.message_logger, - send_resource_update=self._send_resource_update) + send_resource_update=self._send_resource_update, + ) def _setup_paths(self): - """Helper method to setup the paths - """ - self.cell_limit = int(self.user_credentials["permissions"]["cell_limit"]) + """Helper method to setup the paths""" + self.cell_limit = int( + self.user_credentials["permissions"]["cell_limit"] + ) self.process_num_limit = int( - self.user_credentials["permissions"]["process_num_limit"]) + self.user_credentials["permissions"]["process_num_limit"] + ) # Setup the required paths self.temp_grass_data_base = os.path.join( - self.grass_temp_database, self.temp_grass_data_base_name) + self.grass_temp_database, self.temp_grass_data_base_name + ) self.temp_file_path = os.path.join(self.temp_grass_data_base, ".tmp") if self.location_name: self.temp_location_path = os.path.join( - self.temp_grass_data_base, self.location_name) + self.temp_grass_data_base, self.location_name + ) self.global_location_path = os.path.join( - self.grass_data_base, self.location_name) + self.grass_data_base, self.location_name + ) # Create the user database path if it does not exist if not os.path.exists(self.grass_user_data_base): os.mkdir(self.grass_user_data_base) - # Create the user group specific path, if it does not exist and set the, - # grass user database path accordingly + # Create the user group specific path, if it does not exist and set + # the grass user database path accordingly self.grass_user_data_base = os.path.join( - self.grass_user_data_base, self.user_group) + self.grass_user_data_base, self.user_group + ) if not os.path.exists(self.grass_user_data_base): os.mkdir(self.grass_user_data_base) - # Create the user group specific location path, if it does not exist + # Create the user group specific location path, if it does not + # exist self.user_location_path = os.path.join( - self.grass_user_data_base, self.location_name) + self.grass_user_data_base, self.location_name + ) if not os.path.exists(self.user_location_path): os.mkdir(self.user_location_path) # Check if the location is located in the global database @@ -727,13 +831,13 @@ def _create_temp_database(self, mapsets=None): Linking is performed in two steps: 1.) If the location is a global location, then the mapsets from the global location are linked in the temporary locations - 2.) Then link all required mapsets from the user group location into - the temporary location + 2.) Then link all required mapsets from the user group location + into the temporary location Only mapsets from the global location are linked into the temporary location to which the user group has access. - It checks for access in the global database but not in the user group database. - The user can always access its own data of its group. + It checks for access in the global database but not in the user group + database. The user can always access its own data of its group. Args: mapsets: A list of mapset names that should be linked into @@ -774,22 +878,31 @@ def _create_temp_database(self, mapsets=None): for mapset in mapsets: if mapset not in mapset_list: raise AsyncProcessError( - "Unable to link all required mapsets into temporary " - "location. Missing or un-accessible mapset " - "<%s> in location <%s>" - % (mapset, self.location_name)) + "Unable to link all required mapsets into " + "temporary location. Missing or un-accessible " + f"mapset <{mapset}> in location " + f"<{self.location_name}>" + ) # Link the original mapsets from global and user database into the # temporary location for mapset_path, mapset in mapsets_to_link: - if os.path.isdir( - os.path.join(self.temp_location_path, mapset)) is False: - os.symlink(mapset_path, os.path.join( - self.temp_location_path, mapset)) + if ( + os.path.isdir( + os.path.join(self.temp_location_path, mapset) + ) + is False + ): + os.symlink( + mapset_path, + os.path.join(self.temp_location_path, mapset), + ) except Exception as e: - raise AsyncProcessError("Unable to create a temporary GIS database" - ", Exception: %s" % str(e)) + raise AsyncProcessError( + "Unable to create a temporary GIS database" + ", Exception: %s" % str(e) + ) def _link_mapsets(self, mapsets, mapsets_to_link, check_all_mapsets): """Helper method to link locations mapsets @@ -797,8 +910,8 @@ def _link_mapsets(self, mapsets, mapsets_to_link, check_all_mapsets): Args: mapsets (list): List of mapsets in location mapsets_to_link (list): List of mapsets paths to link - check_all_mapsets (bool): If set True, the mapsets list is created with - all locations on location_path + check_all_mapsets (bool): If set True, the mapsets list is created + with all locations on location_path Returns: mapsets (list): List of mapsets in location @@ -808,8 +921,12 @@ def _link_mapsets(self, mapsets, mapsets_to_link, check_all_mapsets): if self.is_global_database is True: # List all available mapsets in the global location mapsets, mapsets_to_link = self._list_all_available_mapsets( - self.global_location_path, mapsets, - check_all_mapsets, mapsets_to_link, True) + self.global_location_path, + mapsets, + check_all_mapsets, + mapsets_to_link, + True, + ) # Check for leftover mapsets left_over_mapsets = [] for mapset in mapsets: @@ -817,26 +934,36 @@ def _link_mapsets(self, mapsets, mapsets_to_link, check_all_mapsets): left_over_mapsets.append(mapset) # List all available mapsets in the user location mapsets, mapsets_to_link = self._list_all_available_mapsets( - self.user_location_path, left_over_mapsets, - check_all_mapsets, mapsets_to_link, False) + self.user_location_path, + left_over_mapsets, + check_all_mapsets, + mapsets_to_link, + False, + ) return mapsets, mapsets_to_link - def _list_all_available_mapsets(self, location_path, mapsets, check_all_mapsets, - mapsets_to_link, global_db=False): + def _list_all_available_mapsets( + self, + location_path, + mapsets, + check_all_mapsets, + mapsets_to_link, + global_db=False, + ): """Helper method to list all available mapsets and for global database it is checked if the mapset can be accessed. Args: location_path (str): Path to location (global or user) mapsets (list): List of mapsets names to link. - The mapsets list can be empty, if check_all_mapsets is - True the list is filled with all mapsets from the - location_path - check_all_mapsets (bool): If set True, the mapsets list is created with - all locations on location_path + The mapsets list can be empty, if check_all_mapsets + is True the list is filled with all mapsets from + the location_path + check_all_mapsets (bool): If set True, the mapsets list is created + with all locations on location_path mapsets_to_link (list): List of mapset paths to link global_db (bool): If set True, the location/mapset access is - checked + checked Returns: mapsets (list): List of mapsets in location @@ -847,36 +974,51 @@ def _list_all_available_mapsets(self, location_path, mapsets, check_all_mapsets, mapsets = os.listdir(location_path) for mapset in mapsets: mapset_path = os.path.join(location_path, mapset) - if (os.path.isdir(mapset_path) - and os.access(mapset_path, os.R_OK & os.X_OK)): + if os.path.isdir(mapset_path) and os.access( + mapset_path, os.R_OK & os.X_OK + ): # Check if a WIND file exists to be sure it is a mapset - if os.path.isfile(os.path.join( - mapset_path, "WIND")) is True: + if ( + os.path.isfile(os.path.join(mapset_path, "WIND")) + is True + ): if mapset not in mapsets_to_link and global_db is True: # Link the mapset from the global database # only if it can be accessed resp = check_location_mapset_module_access( - user_credentials=self.user_credentials, - config=self.config, - location_name=self.location_name, - mapset_name=mapset) + user_credentials=self.user_credentials, + config=self.config, + location_name=self.location_name, + mapset_name=mapset, + ) if resp is None: mapsets_to_link.append((mapset_path, mapset)) - elif mapset not in mapsets_to_link and global_db is False: + elif ( + mapset not in mapsets_to_link + and global_db is False + ): mapsets_to_link.append((mapset_path, mapset)) else: raise AsyncProcessError( "Invalid mapset <%s> in location <%s>" - % (mapset, self.location_name)) + % (mapset, self.location_name) + ) else: if global_db is True: - msg = "Unable to access global location <%s>" % self.location_name + msg = ( + "Unable to access global location <%s>" + % self.location_name + ) else: - msg = "Unable to access user location <%s>" % self.location_name + msg = ( + "Unable to access user location <%s>" % self.location_name + ) raise AsyncProcessError(msg) return mapsets, mapsets_to_link - def _create_grass_environment(self, grass_data_base, mapset_name="PERMANENT"): + def _create_grass_environment( + self, grass_data_base, mapset_name="PERMANENT" + ): """Sets up the GRASS environment to run modules Args: @@ -890,38 +1032,48 @@ def _create_grass_environment(self, grass_data_base, mapset_name="PERMANENT"): """ self.message_logger.info( "Initlialize GRASS grass_data_base: %s; location: %s; mapset: %s" - % (grass_data_base, self.location_name, mapset_name)) - - self.ginit = GrassInitializer(grass_data_base=grass_data_base, - grass_base_dir=self.config.GRASS_GIS_BASE, - location_name=self.location_name, - mapset_name=mapset_name, config=self.config, - grass_addon_path=self.config.GRASS_ADDON_PATH, - user_id=self.user_id) + % (grass_data_base, self.location_name, mapset_name) + ) + + self.ginit = GrassInitializer( + grass_data_base=grass_data_base, + grass_base_dir=self.config.GRASS_GIS_BASE, + location_name=self.location_name, + mapset_name=mapset_name, + config=self.config, + grass_addon_path=self.config.GRASS_ADDON_PATH, + user_id=self.user_id, + ) self.ginit.initialize() - def _create_temporary_mapset(self, temp_mapset_name, source_mapset_name=None, - interim_result_mapset=None, - interim_result_file_path=None): + def _create_temporary_mapset( + self, + temp_mapset_name, + source_mapset_name=None, + interim_result_mapset=None, + interim_result_file_path=None, + ): """Create the temporary mapset and switch into it This method needs an initialized the GRASS environment. - It will check access to all required mapsets and adds them to the mapset - search path. + It will check access to all required mapsets and adds them to the + mapset search path. IMPORTANT: You need to call self._create_grass_environment() to set up the environment before calling this method. - A new temporary mapset is created. All in the process chain detected mapsets - of input maps or STDS will be added to the mapset search path using g.mapsets. + A new temporary mapset is created. All in the process chain detected + mapsets of input maps or STDS will be added to the mapset search path + using g.mapsets. - Optionally the WIND file of a source mapset can be copied into the temporary - mapset. + Optionally the WIND file of a source mapset can be copied into the + temporary mapset. Args: - temp_mapset_name (str): The name of the temporary mapset to be created + temp_mapset_name (str): The name of the temporary mapset to be + created source_mapset_name (str): The name of the source mapset to copy the WIND file from interim_result_mapset (str): The path to the mapset which is saved @@ -936,53 +1088,78 @@ def _create_temporary_mapset(self, temp_mapset_name, source_mapset_name=None, g.mapset/g.mapsets/db.connect modules fail """ - self.temp_mapset_path = os.path.join(self.temp_location_path, temp_mapset_name) + self.temp_mapset_path = os.path.join( + self.temp_location_path, temp_mapset_name + ) # if interim_result_mapset is set copy the mapset from the interim # results if interim_result_mapset: self.message_logger.info( - "Rsync interim result mapset to temporary GRASS DB") + "Rsync interim result mapset to temporary GRASS DB" + ) rsync_status = self.interim_result.rsync_mapsets( - interim_result_mapset, self.temp_mapset_path) - if rsync_status != 'success': + interim_result_mapset, self.temp_mapset_path + ) + if rsync_status != "success": raise RsyncError( - "Error while rsyncing of interim results to new temporare mapset") + "Error while rsyncing of interim results to new temporare " + "mapset" + ) if interim_result_file_path: self.message_logger.info( - "Rsync interim result file path to temporary GRASS DB") + "Rsync interim result file path to temporary GRASS DB" + ) rsync_status = self.interim_result.rsync_mapsets( - interim_result_file_path, self.temp_file_path) - if rsync_status != 'success': + interim_result_file_path, self.temp_file_path + ) + if rsync_status != "success": raise RsyncError( - "Error while rsyncing of interim temporary file path to new " - "temporare file path") + "Error while rsyncing of interim temporary file path to " + "new temporare file path" + ) - self.ginit.run_module("g.mapset", ["-c", "mapset=%s" % temp_mapset_name]) + self.ginit.run_module( + "g.mapset", ["-c", "mapset=%s" % temp_mapset_name] + ) if self.required_mapsets: - self.ginit.run_module("g.mapsets", - ["operation=add", - "mapset=%s" % (",".join(self.required_mapsets))]) + self.ginit.run_module( + "g.mapsets", + [ + "operation=add", + "mapset=%s" % (",".join(self.required_mapsets)), + ], + ) - self.message_logger.info("Added the following mapsets to the mapset " - "search path: " + ",".join(self.required_mapsets)) + self.message_logger.info( + "Added the following mapsets to the mapset " + "search path: " + ",".join(self.required_mapsets) + ) # Set the vector database connection to vector map specific databases - self.ginit.run_module("db.connect", [ - "driver=sqlite", - "database=$GISDBASE/$LOCATION_NAME/$MAPSET/vector/$MAP/sqlite.db"]) + self.ginit.run_module( + "db.connect", + [ + "driver=sqlite", + "database=$GISDBASE/$LOCATION_NAME/$MAPSET/vector/$MAP/" + "sqlite.db", + ], + ) # self.ginit.run_module("g.gisenv", ["set=DEBUG=2",]) - # If a source mapset is provided, the WIND file will be copied from it to the - # temporary mapset + # If a source mapset is provided, the WIND file will be copied from it + # to the temporary mapset if source_mapset_name is not None and interim_result_mapset is None: source_mapset_path = os.path.join( - self.temp_location_path, source_mapset_name) + self.temp_location_path, source_mapset_name + ) if os.path.exists(os.path.join(source_mapset_path, "WIND")): - shutil.copyfile(os.path.join(source_mapset_path, "WIND"), - os.path.join(self.temp_mapset_path, "WIND")) + shutil.copyfile( + os.path.join(source_mapset_path, "WIND"), + os.path.join(self.temp_mapset_path, "WIND"), + ) def _cleanup(self): """Clean up the GrassInitializer files created in @@ -992,16 +1169,19 @@ def _cleanup(self): if self.ginit: self.ginit.clean_up() - if self.temp_grass_data_base is not None and \ - os.path.exists(self.temp_grass_data_base) and \ - os.path.isdir(self.temp_grass_data_base): + if ( + self.temp_grass_data_base is not None + and os.path.exists(self.temp_grass_data_base) + and os.path.isdir(self.temp_grass_data_base) + ): shutil.rmtree(self.temp_grass_data_base, ignore_errors=True) def _check_reset_region(self): """Check the current region settings against the user cell limit. Reset the current processing region to a meaningful state - so that the user cell limit is not reached and the mapset can be accessed again. + so that the user cell limit is not reached and the mapset can be + accessed again. Raises: This method will raise an AsyncProcessError exception @@ -1010,10 +1190,14 @@ def _check_reset_region(self): if self.skip_region_check is True: return - errorid, stdout_buff, stderr_buff = self.ginit.run_module("g.region", ["-ug"]) + errorid, stdout_buff, stderr_buff = self.ginit.run_module( + "g.region", ["-ug"] + ) if errorid != 0: - raise AsyncProcessError("Unable to check the computational region size") + raise AsyncProcessError( + "Unable to check the computational region size" + ) str_list = stdout_buff.split() region = {} @@ -1049,15 +1233,18 @@ def _adjust_region_size(self, num_cells, ns_res, ew_res): ns_res = ns_res * fak ew_res = ew_res * fak errorid, stdout_buff, stderr_buff = self.ginit.run_module( - "g.region", ["nsres=%f" % ns_res, "ewres=%f" % ew_res, "-g"]) + "g.region", ["nsres=%f" % ns_res, "ewres=%f" % ew_res, "-g"] + ) self.message_logger.info(stdout_buff) if errorid != 0: raise AsyncProcessError( - "Unable to adjust the region settings to nsres: " - "%f ewres: %f error: %s" % (ns_res, ew_res, stderr_buff)) + "Unable to adjust the region settings to nsres: " + "%f ewres: %f error: %s" % (ns_res, ew_res, stderr_buff) + ) raise AsyncProcessError( "Region too large, set a coarser resolution to minimum nsres: " - "%f ewres: %f [num_cells: %d]" % (ns_res, ew_res, num_cells)) + "%f ewres: %f [num_cells: %d]" % (ns_res, ew_res, num_cells) + ) def _increment_progress(self, num=1): """Increment the progress step by a specific number @@ -1081,14 +1268,18 @@ def _update_num_of_steps(self, num): """Update the number of total steps Args: - num: The number of processes to be added to the total number of processes + num: The number of processes to be added to the total number of + processes """ self.number_of_processes += num self.progress["num_of_steps"] = self.number_of_processes - def _wait_for_process(self, module_name, module_parameter, proc, poll_time): - """Wait for a specific process. Catch termination requests, process time limits - and send updates to the user. + def _wait_for_process( + self, module_name, module_parameter, proc, poll_time + ): + """ + Wait for a specific process. Catch termination requests, process + time limits and send updates to the user. Args: module_name: The name of the GRASS module or executable @@ -1120,11 +1311,17 @@ def _wait_for_process(self, module_name, module_parameter, proc, poll_time): termination_check_count = 0 # check if the resource should be terminated # and kill the current process - if self.resource_logger.get_termination( - self.user_id, self.resource_id, self.iteration) is True: + if ( + self.resource_logger.get_termination( + self.user_id, self.resource_id, self.iteration + ) + is True + ): proc.kill() - raise AsyncProcessTermination("Process <%s> was terminated " - "by user request" % module_name) + raise AsyncProcessTermination( + "Process <%s> was terminated " + "by user request" % module_name + ) # Send all 100 loops a status update if update_check_count == 100: @@ -1135,22 +1332,25 @@ def _wait_for_process(self, module_name, module_parameter, proc, poll_time): proc.kill() raise AsyncProcessTimeLimit( "Time (%i seconds) exceeded to run executable %s" - % (self.process_time_limit, module_name)) - # Reduce the length of the command line parameters for lesser - # logging overhead + % (self.process_time_limit, module_name) + ) + # Reduce the length of the command line parameters for + # lesser logging overhead mparams = str(module_parameter) if len(mparams) > 100: mparams = "%s ... %s" % (mparams[0:50], mparams[-50:]) message = ( - "Running executable %s with parameters %s for %g seconds" - % (module_name, mparams, curr_time - start_time)) + f"Running executable {module_name} with parameters " + f"{mparams} for {curr_time - start_time} seconds" + ) self._send_resource_update(message) return time.time() - start_time def _run_process(self, process, poll_time=0.05): - """Run a process actinia_core.core.common.process_object.Process) with options and send - progress updates to the resource database. + """ + Run a process actinia_core.core.common.process_object.Process) with + options and send progress updates to the resource database. IMPORTANT: Use this method to run programs that are not GRASS modules. @@ -1175,35 +1375,42 @@ def _run_process(self, process, poll_time=0.05): (returncode, stdout_buff, stderr_buff) """ - if self.resource_logger.get_termination( - self.user_id, self.resource_id, self.iteration) is True: - raise AsyncProcessTermination("Process <%s> was terminated by " - "user request" % process.executable) + if ( + self.resource_logger.get_termination( + self.user_id, self.resource_id, self.iteration + ) + is True + ): + raise AsyncProcessTermination( + "Process <%s> was terminated by " + "user request" % process.executable + ) return self._run_executable(process, poll_time) def _run_module(self, process, poll_time=0.05): - """Run the GRASS module actinia_core.core.common.process_object.Process) with its module - options and send progress updates to the database server that manages - the resource entries. - - Check before each module run the size of the region. If the maximum number - of cells are exceeded then raise an AsyncProcessError exception that the - maximum number of cells are exceeded and reset them to a meaningful state, - so that the user can still run processes in the mapset. + """Run the GRASS module actinia_core.core.common.process_object.Process + with its module options and send progress updates to the database + server that manages the resource entries. + + Check before each module run the size of the region. If the maximum + number of cells are exceeded then raise an AsyncProcessError exception + that the maximum number of cells are exceeded and reset them to a + meaningful state, so that the user can still run processes in the + mapset. - The region is checked for the first module of a process chain and after that, - for each g.region call that was present in the process chain. + The region is checked for the first module of a process chain and after + that, for each g.region call that was present in the process chain. Check each poll the termination status of the resource. If the termination state is set True, terminate the current process and raise an AsyncProcessTermination exception that must be caught by the run() method. - By default the status of the running process is checked each 0.005 seconds. - This is the minimum amount of time a process can run. If many tiny - running processes are executed in a large process chain, then this value - must be adjusted. + By default the status of the running process is checked each 0.005 + seconds. This is the minimum amount of time a process can run. If many + tiny running processes are executed in a large process chain, then this + value must be adjusted. Args: process actinia_core.core.common.process_object.Process): @@ -1224,38 +1431,51 @@ def _run_module(self, process, poll_time=0.05): # Count the processes self.process_count += 1 # Check for each 20. process if a kill request was received - # This is required in case a single of many fast running processes in a chain - # is not able to trigger the termination check in the while loop + # This is required in case a single of many fast running processes in a + # chain is not able to trigger the termination check in the while loop if self.process_count % 20 == 0: - if self.resource_logger.get_termination( - self.user_id, self.resource_id, self.iteration) is True: - raise AsyncProcessTermination("Process <%s> was terminated " - "by user request" % process.executable) + if ( + self.resource_logger.get_termination( + self.user_id, self.resource_id, self.iteration + ) + is True + ): + raise AsyncProcessTermination( + "Process <%s> was terminated " + "by user request" % process.executable + ) message = "Running module %s with parameters %s" % ( - process.executable, str(process.executable_params)) + process.executable, + str(process.executable_params), + ) self._send_resource_update(message) - # Check reset region if a g.region call was present in the process chain. - # By default the initial value of last_module is "g.region" to assure - # for first run of a process from the process chain, the + # Check reset region if a g.region call was present in the process + # chain. By default the initial value of last_module is "g.region" to + # assure for first run of a process from the process chain, the # region settings are evaluated - if self.last_module == "g.region" and process.skip_permission_check is False: + if ( + self.last_module == "g.region" + and process.skip_permission_check is False + ): self._check_reset_region() - # Save the last module name. This is needed to check the region settings + # Save the last module name. This is needed to check the region + # settings self.last_module = process.executable return self._run_executable(process, poll_time) def _run_executable(self, process, poll_time=0.005): - """Runs a GRASS module or aactinia_core.core.Unix executable and sets up - the correct handling of stdout, stderr and stdin, creates the + """Runs a GRASS module or aactinia_core.core.Unix executable and sets + up the correct handling of stdout, stderr and stdin, creates the process log model and returns stdout, stderr and the return code. It creates the temporary file paths. - The returncode of 0 indicates that it ran successfully. A negative value -N - indicates that the child was terminated by signal N (POSIX only; see also + The returncode of 0 indicates that it ran successfully. A negative + value -N indicates that the child was terminated by signal N (POSIX + only; see also https://en.wikipedia.org/wiki/Signal_(IPC)#Default_action). Args: @@ -1277,9 +1497,11 @@ def _run_executable(self, process, poll_time=0.005): # Use temporary files to catch stdout and stderr stdout_buff = tempfile.NamedTemporaryFile( - mode="w+b", delete=True, dir=self.temp_file_path) + mode="w+b", delete=True, dir=self.temp_file_path + ) stderr_buff = tempfile.NamedTemporaryFile( - mode="w+b", delete=True, dir=self.temp_file_path) + mode="w+b", delete=True, dir=self.temp_file_path + ) stdin_file = None if process.stdin_source is not None: @@ -1291,28 +1513,32 @@ def _run_executable(self, process, poll_time=0.005): self._increment_progress(num=1) - # print(process) - - # GRASS andactinia_core.core.Unix executables have different run methods + # GRASS andactinia_core.core.Unix executables have different run + # methods if process.exec_type in "grass": - proc = self.ginit.run_module(process.executable, - process.executable_params, raw=True, - stdout=stdout_buff, - stderr=stderr_buff, - stdin=stdin_file) + proc = self.ginit.run_module( + process.executable, + process.executable_params, + raw=True, + stdout=stdout_buff, + stderr=stderr_buff, + stdin=stdin_file, + ) else: inputlist = list() inputlist.append(process.executable) inputlist.extend(process.executable_params) - proc = subprocess.Popen(args=inputlist, - stdout=stdout_buff, - stderr=stderr_buff, - stdin=stdin_file) + proc = subprocess.Popen( + args=inputlist, + stdout=stdout_buff, + stderr=stderr_buff, + stdin=stdin_file, + ) - run_time = self._wait_for_process(process.executable, - process.executable_params, - proc, poll_time) + run_time = self._wait_for_process( + process.executable, process.executable_params, proc, poll_time + ) proc.wait() @@ -1330,42 +1556,52 @@ def _run_executable(self, process, poll_time=0.005): process.set_stdouts(stdout=stdout_string, stderr=stderr_string) kwargs = { - 'id': process.id, - 'executable': process.executable, - 'parameter': process.executable_params, - 'return_code': proc.returncode, - 'stdout': stdout_string, - 'stderr': stderr_string.split("\n"), - 'run_time': run_time} + "id": process.id, + "executable": process.executable, + "parameter": process.executable_params, + "return_code": proc.returncode, + "stdout": stdout_string, + "stderr": stderr_string.split("\n"), + "run_time": run_time, + } if self.temp_mapset_path: - kwargs['mapset_size'] = get_directory_size(self.temp_mapset_path) + kwargs["mapset_size"] = get_directory_size(self.temp_mapset_path) plm = ProcessLogModel(**kwargs) self.module_output_log.append(plm) - # Store the log in an additional dictionary for automated output generation + # Store the log in an additional dictionary for automated output + # generation if process.id is not None: self.module_output_dict[process.id] = plm if proc.returncode != 0: raise AsyncProcessError( - "Error while running executable <%s>" % process.executable) + "Error while running executable <%s>" % process.executable + ) # save interim results - if (self.interim_result.saving_interim_results is True - and self.temp_mapset_path is not None): + if ( + self.interim_result.saving_interim_results is True + and self.temp_mapset_path is not None + ): self.interim_result.save_interim_results( - self.progress_steps, self.temp_mapset_path, self.temp_file_path) + self.progress_steps, self.temp_mapset_path, self.temp_file_path + ) elif self.temp_mapset_path is None: self.message_logger.debug( "No temp mapset path set. Because of that no interim results" - " can be saved!") + " can be saved!" + ) return proc.returncode, stdout_string, stderr_string - def _create_temporary_grass_environment(self, source_mapset_name=None, - interim_result_mapset=None, - interim_result_file_path=None): + def _create_temporary_grass_environment( + self, + source_mapset_name=None, + interim_result_mapset=None, + interim_result_file_path=None, + ): """Create a temporary GRASS GIS environment This method will: @@ -1395,14 +1631,17 @@ def _create_temporary_grass_environment(self, source_mapset_name=None, # Initialize the GRASS environment and switch into PERMANENT # mapset, which is always linked - self._create_grass_environment(grass_data_base=self.temp_grass_data_base, - mapset_name="PERMANENT") + self._create_grass_environment( + grass_data_base=self.temp_grass_data_base, mapset_name="PERMANENT" + ) # Create the temporary mapset and switch into it - self._create_temporary_mapset(temp_mapset_name=self.temp_mapset_name, - source_mapset_name=source_mapset_name, - interim_result_mapset=interim_result_mapset, - interim_result_file_path=interim_result_file_path) + self._create_temporary_mapset( + temp_mapset_name=self.temp_mapset_name, + source_mapset_name=source_mapset_name, + interim_result_mapset=interim_result_mapset, + interim_result_file_path=interim_result_file_path, + ) def _execute(self, skip_permission_check=False): """Overwrite this function in subclasses. @@ -1414,7 +1653,8 @@ def _execute(self, skip_permission_check=False): - Create the temporal database - Initialize the GRASS environment and create the temporary mapset - Run the modules - - Parse the stdout output of the modules and generate the module results + - Parse the stdout output of the modules and generate the module + results Args: skip_permission_check (bool): If set True, the permission checks of @@ -1428,12 +1668,15 @@ def _execute(self, skip_permission_check=False): """ # Create the process chain if self.rdc.iteration is not None: - process_list = \ - self._create_temporary_grass_environment_and_process_list_for_iteration( - skip_permission_check=skip_permission_check) + process_list = self._create_temporary_grass_environment_and_process_list_for_iteration( + skip_permission_check=skip_permission_check + ) else: - process_list = self._create_temporary_grass_environment_and_process_list( - skip_permission_check=skip_permission_check) + process_list = ( + self._create_temporary_grass_environment_and_process_list( + skip_permission_check=skip_permission_check + ) + ) # Run all executables self._execute_process_list(process_list=process_list) @@ -1441,7 +1684,8 @@ def _execute(self, skip_permission_check=False): self._parse_module_outputs() def _create_temporary_grass_environment_and_process_list_for_iteration( - self, process_chain=None, skip_permission_check=False): + self, process_chain=None, skip_permission_check=False + ): """Helper method to: - Setup logger and credentials @@ -1468,30 +1712,39 @@ def _create_temporary_grass_environment_and_process_list_for_iteration( self._setup() # Create and check the process chain - pc_step, old_process_chain_list = self._get_previous_iteration_process_chain() + ( + pc_step, + old_process_chain_list, + ) = self._get_previous_iteration_process_chain() self.interim_result.set_old_pc_step(pc_step) process_list = self._validate_process_chain( process_chain=self.request_data, old_process_chain=old_process_chain_list, skip_permission_check=skip_permission_check, - pc_step=pc_step) + pc_step=pc_step, + ) # check iterim results - interim_result_mapset, interim_result_file_path = \ - self.interim_result.check_interim_result_mapset( - pc_step, self.iteration - 1) + ( + interim_result_mapset, + interim_result_file_path, + ) = self.interim_result.check_interim_result_mapset( + pc_step, self.iteration - 1 + ) if interim_result_mapset is None: return None # Init GRASS and create the temporary mapset with the interim results self._create_temporary_grass_environment( interim_result_mapset=interim_result_mapset, - interim_result_file_path=interim_result_file_path) + interim_result_file_path=interim_result_file_path, + ) return process_list def _create_temporary_grass_environment_and_process_list( - self, process_chain=None, skip_permission_check=False): + self, process_chain=None, skip_permission_check=False + ): """Helper method to: - Setup logger and credentials @@ -1520,7 +1773,8 @@ def _create_temporary_grass_environment_and_process_list( # Create and check the process chain process_list = self._validate_process_chain( process_chain=process_chain, - skip_permission_check=skip_permission_check) + skip_permission_check=skip_permission_check, + ) # Init GRASS and create the temporary mapset self._create_temporary_grass_environment() @@ -1528,13 +1782,13 @@ def _create_temporary_grass_environment_and_process_list( return process_list def _parse_module_outputs(self): - """Parse the module stdout outputs and parse them into the required formats: - table, list or kv + """Parse the module stdout outputs and parse them into the required + formats: table, list or kv This functions analyzes the output_parser_list for entries to parse. - It will convert the stdout strings into tables, lists or key/value outputs - and stores the result in the module_result dictionary using the provided - id of the StdoutParser. + It will convert the stdout strings into tables, lists or key/value + outputs and stores the result in the module_result dictionary using the + provided id of the StdoutParser. """ @@ -1545,7 +1799,8 @@ def _parse_module_outputs(self): delimiter = stdout_def["delimiter"] if process_id not in self.module_output_dict: raise AsyncProcessError( - "Unable to find process id in module output dictionary") + "Unable to find process id in module output dictionary" + ) stdout = self.module_output_dict[process_id]["stdout"] # Split the rows by the \n new line delimiter rows = stdout.strip().split("\n") @@ -1572,9 +1827,12 @@ def _parse_module_outputs(self): elif "json" in format: result = None try: - result = {i[0]: i[1] for i in [ - entry.split(delimiter, 1) for entry in - stdout.strip('\n').split('\n')] + result = { + i[0]: i[1] + for i in [ + entry.split(delimiter, 1) + for entry in stdout.strip("\n").split("\n") + ] } except Exception: try: @@ -1591,7 +1849,8 @@ def _parse_module_outputs(self): self.module_results[id] = result def _execute_process_list(self, process_list): - """Run all modules or executables that are specified in the process list + """ + Run all modules or executables that are specified in the process list Args: process_list: The process list that was generated by @@ -1623,8 +1882,8 @@ def _final_cleanup(self): def run(self): """This function will run the processing and will catch and process - any Exceptions that were raised while processing. Call this function to run the - processing. + any Exceptions that were raised while processing. Call this function to + run the processing. You have to implement/overwrite two methods that are called here: @@ -1632,7 +1891,8 @@ def run(self): * self._final_cleanup() e_type, e_value, e_traceback = sys.exc_info() - message = [e.__class__, e_type, e_value, traceback.format_tb(e_traceback)] + message = [e.__class__, e_type, e_value, traceback.format_tb( + e_traceback)] message = pprint.pformat(message) """ @@ -1645,21 +1905,27 @@ def run(self): self.run_state = {"time limit exceeded": str(e)} except AsyncProcessError as e: e_type, e_value, e_tb = sys.exc_info() - model = ExceptionTracebackModel(message=str(e_value), - traceback=traceback.format_tb(e_tb), - type=str(e_type)) + model = ExceptionTracebackModel( + message=str(e_value), + traceback=traceback.format_tb(e_tb), + type=str(e_type), + ) self.run_state = {"error": str(e), "exception": model} except KeyboardInterrupt as e: e_type, e_value, e_tb = sys.exc_info() - model = ExceptionTracebackModel(message=str(e_value), - traceback=traceback.format_tb(e_tb), - type=str(e_type)) + model = ExceptionTracebackModel( + message=str(e_value), + traceback=traceback.format_tb(e_tb), + type=str(e_type), + ) self.run_state = {"error": str(e), "exception": model} except Exception as e: e_type, e_value, e_tb = sys.exc_info() - model = ExceptionTracebackModel(message=str(e_value), - traceback=traceback.format_tb(e_tb), - type=str(e_type)) + model = ExceptionTracebackModel( + message=str(e_value), + traceback=traceback.format_tb(e_tb), + type=str(e_type), + ) self.run_state = {"error": str(e), "exception": model} finally: try: @@ -1667,24 +1933,31 @@ def run(self): self._final_cleanup() except Exception as e: e_type, e_value, e_tb = sys.exc_info() - model = ExceptionTracebackModel(message=str(e_value), - traceback=traceback.format_tb(e_tb), - type=str(e_type)) + model = ExceptionTracebackModel( + message=str(e_value), + traceback=traceback.format_tb(e_tb), + type=str(e_type), + ) self.run_state = {"error": str(e), "exception": model} # After all processing finished, send the final status if "success" in self.run_state: - self._send_resource_finished(message=self.finish_message, - results=self.module_results) + self._send_resource_finished( + message=self.finish_message, results=self.module_results + ) elif "terminated" in self.run_state: # Send an error message if an exception was raised - self._send_resource_terminated(message=self.run_state["terminated"]) + self._send_resource_terminated( + message=self.run_state["terminated"] + ) elif "time limit exceeded" in self.run_state: self._send_resource_time_limit_exceeded( - message=self.run_state["time limit exceeded"]) + message=self.run_state["time limit exceeded"] + ) elif "error" in self.run_state: # Send an error message if an exception was raised self._send_resource_error( message=self.run_state["error"], - exception=self.run_state["exception"]) + exception=self.run_state["exception"], + ) else: self._send_resource_error(message="Unknown error") From da38d53b4af8b8097559cde2a7d96ca4b871aecb Mon Sep 17 00:00:00 2001 From: Carmen Tawalika Date: Thu, 22 Sep 2022 16:44:25 +0200 Subject: [PATCH 09/46] Add job queue name to response (#380) * add job queue name to response * lint * Update src/actinia_core/models/response_models.py Co-authored-by: Markus Neteler * black * undo-black Co-authored-by: Markus Neteler --- .../processing/actinia_processing/ephemeral_processing.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py index e34c076..832603b 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -292,6 +292,7 @@ def _send_resource_update(self, message, results=None): status="running", user_id=self.user_id, resource_id=self.resource_id, + queue=self.rdc.queue, iteration=self.iteration, # process_log=self.module_output_log, progress=self.progress, @@ -319,6 +320,7 @@ def _send_resource_finished(self, message, results=None): status="finished", user_id=self.user_id, resource_id=self.resource_id, + queue=self.rdc.queue, iteration=self.iteration, process_log=self.module_output_log, progress=self.progress, @@ -348,6 +350,7 @@ def _send_resource_terminated(self, message, results=None): status="terminated", user_id=self.user_id, resource_id=self.resource_id, + queue=self.rdc.queue, iteration=self.iteration, process_log=self.module_output_log, progress=self.progress, @@ -376,6 +379,7 @@ def _send_resource_time_limit_exceeded(self, message, results=None): status="terminated", user_id=self.user_id, resource_id=self.resource_id, + queue=self.rdc.queue, iteration=self.iteration, process_log=self.module_output_log, progress=self.progress, @@ -404,6 +408,7 @@ def _send_resource_error(self, message, results=None, exception=None): status="error", user_id=self.user_id, resource_id=self.resource_id, + queue=self.rdc.queue, iteration=self.iteration, process_log=self.module_output_log, progress=self.progress, From 8c606bfb32400cfff81bb19acc6713d78d2a4389 Mon Sep 17 00:00:00 2001 From: Julia Haas <83269984+juleshaas@users.noreply.github.com> Date: Fri, 21 Oct 2022 15:31:56 +0200 Subject: [PATCH 10/46] Change pc style for module description for importer and exporter in module plugin (#387) * change pc style for module description for importer and exporter in module plugin * black --- .../actinia_processing/ephemeral_processing.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py index 832603b..8c32e8b 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -75,11 +75,10 @@ class EphemeralProcessing(object): - """ - This class processes GRASS data on the local machine in an temporary + """This class processes GRASS data on the local machine in a temporary mapset. - The temporary mapset will be removed by this class when the processing + The temporary mapset will be removed by this class when the processing is finished Creating the temporary database and mapset: @@ -92,7 +91,7 @@ class EphemeralProcessing(object): e.g: /tmp/soeren_temp_gisdbase/ECAD - 3. Softlink the PERMANENT and all required mapsets into the, + 3. Softlink the PERMANENT and all required mapsets into the new location directory from the original location, check the input parameter of the module for which mapsets must be linked @@ -102,7 +101,7 @@ class EphemeralProcessing(object): -> /tmp/soeren_temp_gisdbase/ECAD/Temperature 4. Set the GRASS GIS environmental variables to point to the new gisdbase, - location and PERMANENT maspet + location and PERMANENT mapset 5. Create a new mapset with g.mapset in the temporary location directory @@ -470,8 +469,9 @@ def _send_to_database(self, document, final=False): def _post_to_webhook(self, document, type): """Helper method to send a post request to a webhook. - The finished webhook will be retried until it is reached of the number - of tries is WEBHOOK_RETRIES which can be set in the config. + The finished webhook will be retried until it has reached the number + of tries. The number of tries is WEBHOOK_RETRIES which can be set in the + config. Args: document (str): The response document @@ -698,7 +698,7 @@ def _setup(self, init_grass=True): - Create the resource and message logger - Create the redis lock interface for resource locking - Set cell limit, process number limit and process time limit from user - c redentials. + credentials. - Create all required paths to original and temporary location and mapsets. - temp_location_path @@ -1887,7 +1887,7 @@ def _final_cleanup(self): def run(self): """This function will run the processing and will catch and process - any Exceptions that were raised while processing. Call this function to + any exceptions that were raised while processing. Call this function to run the processing. You have to implement/overwrite two methods that are called here: From e17aa1ea0a38cdfb1bc6f9d38fa195955320e7e1 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Tue, 22 Nov 2022 12:38:14 +0100 Subject: [PATCH 11/46] Start with the adjustment of the interim results (#390) adjustment of the interim results Co-authored-by: anikaweinmann Co-authored-by: Carmen Tawalika --- .../ephemeral_processing.py | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py index 8c32e8b..6af3cc2 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -71,7 +71,7 @@ __copyright__ = ( "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" ) -__maintainer__ = "mundialis" +__maintainer__ = "mundialis GmbH & Co. KG" class EphemeralProcessing(object): @@ -154,7 +154,10 @@ def __init__(self, rdc): self.status_url = self.rdc.status_url self.api_info = self.rdc.api_info self.interim_result = InterimResult( - self.user_id, self.resource_id, self.iteration + self.user_id, + self.resource_id, + self.iteration, + self.api_info["endpoint"], ) self.grass_data_base = self.rdc.grass_data_base # Global database @@ -1875,6 +1878,26 @@ def _execute_process_list(self, process_list): elif process.exec_type == "python": eval(process.executable) + def _interim_results(self): + """Check if interim results should be saved or cleaned up""" + + if ( + "error" in self.run_state + and self.interim_result.saving_interim_results == "onError" + ): + self.interim_result.delete_interim_results() + self.interim_result.save_interim_results( + self.progress_steps - 1, + self.temp_mapset_path, + self.temp_file_path, + force_copy=True, + ) + elif ( + "success" in self.run_state + and self.interim_result.saving_interim_results is not False + ): + self.interim_result.delete_interim_results() + def _final_cleanup(self): """Overwrite this function in subclasses to perform the final cleanup, by default this function calls self._cleanup() to remove the temporary @@ -1893,6 +1916,7 @@ def run(self): You have to implement/overwrite two methods that are called here: * self._execute() + * self._interim_results() * self._final_cleanup() e_type, e_value, e_traceback = sys.exc_info() @@ -1934,6 +1958,8 @@ def run(self): self.run_state = {"error": str(e), "exception": model} finally: try: + # Check if interim results should be saved and save them + self._interim_results() # Call the final cleanup, before sending the status messages self._final_cleanup() except Exception as e: From e999c9a39783a2df0b5dd2f669d8d197303c2f20 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Wed, 14 Dec 2022 15:40:02 +0100 Subject: [PATCH 12/46] Improve interim results by include additional mapsets (#399) * Add possibility to configure pattern to save addional mapsets in the interim resutls Co-authored-by: anikaweinmann --- .../processing/actinia_processing/ephemeral_processing.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py index 6af3cc2..f97172e 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -1114,6 +1114,9 @@ def _create_temporary_mapset( "Error while rsyncing of interim results to new temporare " "mapset" ) + self.interim_result.rsync_additional_mapsets( + os.path.dirname(self.temp_mapset_path) + ) if interim_result_file_path: self.message_logger.info( "Rsync interim result file path to temporary GRASS DB" From 089ce41863ba39cfea23dda7b5a9db61cd40e245 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Fri, 20 Jan 2023 14:39:48 +0100 Subject: [PATCH 13/46] Add raster VRT support and support for mapset names by interim results (#410) * add raster VRT support and support for mapset names by interim results Co-authored-by: anikaweinmann , linakrisztian --- .../ephemeral_processing.py | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py index f97172e..7180afa 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -4,7 +4,7 @@ # performance processing of geographical data that uses GRASS GIS for # computational tasks. For details, see https://actinia.mundialis.de/ # -# Copyright (c) 2016-2022 Sören Gebbert and mundialis GmbH & Co. KG +# Copyright (c) 2016-2023 Sören Gebbert and mundialis GmbH & Co. KG # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -45,6 +45,7 @@ from actinia_core.core.messages_logger import MessageLogger from actinia_core.core.redis_lock import RedisLockingInterface from actinia_core.core.resources_logger import ResourceLogger +from actinia_core.core.mapset_merge_utils import change_mapsetname from actinia_core.core.common.process_chain import ProcessChainConverter from actinia_core.core.common.exceptions import ( AsyncProcessError, @@ -67,9 +68,9 @@ ) __license__ = "GPLv3" -__author__ = "Sören Gebbert, Anika Weinmann" +__author__ = "Sören Gebbert, Anika Weinmann, Lina Krisztian" __copyright__ = ( - "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" + "Copyright 2016-2023, Sören Gebbert and mundialis GmbH & Co. KG" ) __maintainer__ = "mundialis GmbH & Co. KG" @@ -1103,9 +1104,18 @@ def _create_temporary_mapset( # if interim_result_mapset is set copy the mapset from the interim # results if interim_result_mapset: + self.message_logger.info( "Rsync interim result mapset to temporary GRASS DB" ) + # change mapset name for groups, raster VRTs and tgis + for directory in ["group", "cell_misc", "tgis"]: + change_mapsetname( + os.path.join(interim_result_mapset, directory), + directory, + os.path.basename(interim_result_mapset), + os.path.basename(self.temp_mapset_path), + ) rsync_status = self.interim_result.rsync_mapsets( interim_result_mapset, self.temp_mapset_path ) @@ -1888,13 +1898,14 @@ def _interim_results(self): "error" in self.run_state and self.interim_result.saving_interim_results == "onError" ): - self.interim_result.delete_interim_results() - self.interim_result.save_interim_results( - self.progress_steps - 1, - self.temp_mapset_path, - self.temp_file_path, - force_copy=True, - ) + if self.progress_steps > 0: + self.interim_result.delete_interim_results() + self.interim_result.save_interim_results( + self.progress_steps - 1, + self.temp_mapset_path, + self.temp_file_path, + force_copy=True, + ) elif ( "success" in self.run_state and self.interim_result.saving_interim_results is not False From d62628bdb12bcfa9be525c4bb38031e1805a2c8a Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Thu, 29 Jun 2023 12:14:28 +0200 Subject: [PATCH 14/46] Black (#458) * make resource resource delition asynchron * trailing and ending precommit ... * do not use pylint * trigger linting * linting * linting --------- Co-authored-by: anikaweinmann --- .../processing/actinia_processing/ephemeral_processing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py index 7180afa..292d064 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -1104,7 +1104,6 @@ def _create_temporary_mapset( # if interim_result_mapset is set copy the mapset from the interim # results if interim_result_mapset: - self.message_logger.info( "Rsync interim result mapset to temporary GRASS DB" ) From 5d6d1671a67f61e36277f65c85a871b7c63dadbf Mon Sep 17 00:00:00 2001 From: Carmen Tawalika Date: Thu, 2 Nov 2023 14:56:48 +0100 Subject: [PATCH 15/46] Allow separate config for worker Part 1 (#376) * allow separate config for worker * test tests * fix test * create download cache if not exists * create download cache if not exists * create resource storage if not exists * create log folder * further work on tests * add TODOs in tests --- .../actinia_processing/ephemeral_processing.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py index 292d064..fd8d0a7 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -40,6 +40,7 @@ from flask import json from requests.auth import HTTPBasicAuth +from actinia_core.core.common.config import global_config, DEFAULT_CONFIG_PATH from actinia_core.core.common.process_object import Process from actinia_core.core.grass_init import GrassInitializer from actinia_core.core.messages_logger import MessageLogger @@ -141,7 +142,14 @@ def __init__(self, rdc): # rdc = ResourceDataContainer() self.rdc = rdc - self.config = self.rdc.config + if os.path.exists(DEFAULT_CONFIG_PATH) is True and os.path.isfile( + DEFAULT_CONFIG_PATH + ): + self.config = global_config + self.rdc.config = self.config + else: + self.config = self.rdc.config + self.data = self.rdc.user_data self.grass_temp_database = self.config.GRASS_TMP_DATABASE From 06431d041ba784e36cb93759b0de875350e4812c Mon Sep 17 00:00:00 2001 From: linakrisztian <106728040+linakrisztian@users.noreply.github.com> Date: Tue, 21 Nov 2023 15:59:58 +0100 Subject: [PATCH 16/46] Checking pixellimit for r.import commands (#491) * started implementation for querying pixellimit for r.import commands * continued implementation of pixel limit check for rimport commands * considered resampling/reprojection for raster size + cleanup of created vrt * linting * markdown formating * add tests for pixellimit check * improved check in test + added fix for determining the estimated resolution of raster * Update tests/test_raster_import_pixellimit.py Co-authored-by: Markus Neteler * Update tests/test_raster_import_pixellimit.py Co-authored-by: Markus Neteler * Update tests/test_raster_import_pixellimit.py Co-authored-by: Carmen Tawalika * Update tests/test_raster_import_pixellimit.py Co-authored-by: Carmen Tawalika * Update src/actinia_core/processing/actinia_processing/ephemeral_processing.py Co-authored-by: Carmen Tawalika * add test for importer --------- Co-authored-by: Markus Neteler Co-authored-by: Carmen Tawalika --- .../ephemeral_processing.py | 157 +++++++++++++++++- 1 file changed, 155 insertions(+), 2 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py index fd8d0a7..15bbb9b 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -192,6 +192,7 @@ def __init__(self, rdc): self.temp_mapset_path = None self.ginit = None + self.ginit_tmpfiles = list() # Successfully finished message self.finish_message = "Processing successfully finished" @@ -1203,6 +1204,154 @@ def _cleanup(self): and os.path.isdir(self.temp_grass_data_base) ): shutil.rmtree(self.temp_grass_data_base, ignore_errors=True) + if self.ginit_tmpfiles: + for tmpfile in self.ginit_tmpfiles: + try: + os.remove(tmpfile) + except Exception as e: + self.message_logger.debug( + f"Temporary file {tmpfile} can't be removed: {e}" + ) + + def _check_pixellimit_rimport(self, process_executable_params): + """Check the current r.import command against the user cell limit. + + Raises: + This method will raise an AsyncProcessError exception + + """ + rimport_inp = [x for x in process_executable_params if "input=" in x][ + 0 + ].split("=")[1] + rimport_out = [x for x in process_executable_params if "output=" in x][ + 0 + ].split("=")[1] + vrt_out = f"{rimport_out}_{os.getpid()}_tmp.vrt" + self.ginit_tmpfiles.append(vrt_out) + + # define extent_region if set (otherwise empty list) + extent_region = [ + x for x in process_executable_params if "extent=" in x + ] + + # build VRT of rimport input + gdabuildvrt_params = list() + # if extent=region set, vrt only for region, not complete input + if extent_region: + # first query region extents + errorid, stdout_gregion, stderr_gregion = self.ginit.run_module( + "g.region", ["-ug"] + ) + if errorid != 0: + raise AsyncProcessError( + "Unable to check the computational region size" + ) + # parse region extents for creation of vrt (-te flag from gdalbuildvrt) + list_out_gregion = stdout_gregion.split("\n") + gdabuildvrt_params.append("-te") + gdabuildvrt_params.append(list_out_gregion[4]) # xmin/w + gdabuildvrt_params.append(list_out_gregion[3]) # ymin/s + gdabuildvrt_params.append(list_out_gregion[5]) # xmax/e + gdabuildvrt_params.append(list_out_gregion[2]) # ymax/n + # out and input for gdalbuildvrt + gdabuildvrt_params.append(vrt_out) + gdabuildvrt_params.append(rimport_inp) + # build vrt with previous defined parameters + ( + errorid, + stdout_gdalbuildvrt, + stderr_gdalbuildvrt, + ) = self.ginit.run_module("/usr/bin/gdalbuildvrt", gdabuildvrt_params) + + # gdalinfo for created vrt + gdalinfo_params = [vrt_out] + errorid, stdout_gdalinfo, stderr_gdalinfo = self.ginit.run_module( + "/usr/bin/gdalinfo", gdalinfo_params + ) + # parse "Size" output of gdalinfo + rastersize_list = ( + stdout_gdalinfo.split("Size is")[1].split("\n")[0].split(",") + ) + # size = x-dim*y-dim + rastersize_x = int(rastersize_list[0]) + rastersize_y = int(rastersize_list[1]) + rastersize = rastersize_x * rastersize_y + + # if different import/reprojection resolution set: + rimport_res = [ + x for x in process_executable_params if "resolution=" in x + ] + res_val = None + # If raster exceeds cell limit already in original resolution, next part can be skipped + if rimport_res and (rastersize < self.cell_limit): + # determine estimated resolution + errorid, stdout_estres, stderr_estres = self.ginit.run_module( + "r.import", [vrt_out, "-e"] + ) + if "Estimated" in stderr_estres: + # if data in different projection get rest_est with output of r.import -e + res_est = float(stderr_estres.split("\n")[-2].split(":")[1]) + else: + # if data in same projection can use gdalinfo output + res_xy = ( + stdout_gdalinfo.split("Pixel Size = (")[1] + .split(")\n")[0] + .split(",") + ) + # get estimated resolution + # (analoug as done within r.import -e: estres = math.sqrt((n - s) * (e - w) / cells)) + res_est = math.sqrt(abs(float(res_xy[0]) * float(res_xy[1]))) + # determine set resolution value + resolution = rimport_res[0].split("=")[1] + if resolution == "value": + res_val = [ + float( + [ + x + for x in process_executable_params + if "resolution_value=" in x + ][0].split("=")[1] + ) + ] * 2 + elif resolution == "region": + # if already queried above reuse, otherwise execute g.region command + try: + stdout_gregion + except Exception: + ( + errorid, + stdout_gregion, + stderr_gregion, + ) = self.ginit.run_module("g.region", ["-ug"]) + res_val_ns = float( + [x for x in stdout_gregion.split("\n") if "nsres=" in x][ + 0 + ].split("=")[1] + ) + res_val_ew = float( + [x for x in stdout_gregion.split("\n") if "ewres=" in x][ + 0 + ].split("=")[1] + ) + res_val = [res_val_ns, res_val_ew] + if res_val: + if (res_val[0] < res_est) | (res_val[1] < res_est): + # only check if smaller resolution set + res_change_x = res_est / res_val[1] + res_change_y = res_est / res_val[0] + # approximate raster size after resampling + # by using factor of changed resolution + rastersize = ( + rastersize_x * res_change_x * rastersize_y * res_change_y + ) + + # compare estimated raster output size with pixel limit + # and raise exception if exceeded + if rastersize > self.cell_limit: + raise AsyncProcessError( + "Processing pixel limit exceeded for raster import. " + "Please set e.g. region smaller." + ) def _check_reset_region(self): """Check the current region settings against the user cell limit. @@ -1479,10 +1628,14 @@ def _run_module(self, process, poll_time=0.05): ) self._send_resource_update(message) + # Check pixel limit for r.import operations + if process.executable == "r.import": + self._check_pixellimit_rimport(process.executable_params) + # Check reset region if a g.region call was present in the process # chain. By default the initial value of last_module is "g.region" to - # assure for first run of a process from the process chain, the - # region settings are evaluated + # assure for first run of a process from the process chain, the region + # settings are evaluated if ( self.last_module == "g.region" and process.skip_permission_check is False From 98d6a477cf6ea428c78f0918485226e7ed826d03 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Fri, 23 Feb 2024 10:46:45 +0100 Subject: [PATCH 17/46] Add stdin for parameters (#501) * add stdin for parameters * add filter to stdout for other modules * add tests --------- Co-authored-by: anikaweinmann --- .../ephemeral_processing.py | 45 +++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py index 15bbb9b..27462cd 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -4,7 +4,7 @@ # performance processing of geographical data that uses GRASS GIS for # computational tasks. For details, see https://actinia.mundialis.de/ # -# Copyright (c) 2016-2023 Sören Gebbert and mundialis GmbH & Co. KG +# Copyright (c) 2016-2024 Sören Gebbert and mundialis GmbH & Co. KG # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -47,7 +47,10 @@ from actinia_core.core.redis_lock import RedisLockingInterface from actinia_core.core.resources_logger import ResourceLogger from actinia_core.core.mapset_merge_utils import change_mapsetname -from actinia_core.core.common.process_chain import ProcessChainConverter +from actinia_core.core.common.process_chain import ( + get_param_stdin_part, + ProcessChainConverter, +) from actinia_core.core.common.exceptions import ( AsyncProcessError, AsyncProcessTermination, @@ -71,7 +74,7 @@ __license__ = "GPLv3" __author__ = "Sören Gebbert, Anika Weinmann, Lina Krisztian" __copyright__ = ( - "Copyright 2016-2023, Sören Gebbert and mundialis GmbH & Co. KG" + "Copyright 2016-2024, Sören Gebbert and mundialis GmbH & Co. KG" ) __maintainer__ = "mundialis GmbH & Co. KG" @@ -1685,6 +1688,42 @@ def _run_executable(self, process, poll_time=0.005): ) stdin_file = None + if process.param_stdin_sources: + for num, func in process.param_stdin_sources.items(): + func_name = f"PARAM_STDIN_FUNC_{num}" + for i in range(len(process.executable_params)): + param = process.executable_params[i] + if func_name in param: + par, val = param.split("=", 1) + par_val = func().strip() + val_splitted = val.split(func_name) + for j in range(1, len(val_splitted)): + filtered_par_value = par_val + filtered_func_name = func_name + # filter stdout/stderr + if "::" in val_splitted[j]: + filter = get_param_stdin_part( + val_splitted[j][2:] + ) + if "=" not in par_val: + raise AsyncProcessError( + "Error while running executable " + f"<{process.executable}>: <{filter}> " + "cannot be selected. Maybe you have to " + "set the '-g' flag for the stdout/stderr " + "module." + ) + filtered_par_value = { + x.split("=")[0]: x.split("=")[1] + for x in par_val.split() + }[filter] + filtered_func_name += f"::{filter}" + process.executable_params[ + i + ] = process.executable_params[i].replace( + filtered_func_name, filtered_par_value + ) + if process.stdin_source is not None: tmp_file = self.proc_chain_converter.generate_temp_file_path() stdin_file = open(tmp_file, "w") From 9431eb1a1105ed7ec5d3ac62f6ec5dc073f7f05f Mon Sep 17 00:00:00 2001 From: Carmen Tawalika Date: Fri, 21 Jun 2024 09:08:13 +0200 Subject: [PATCH 18/46] Lint (#535) * lint * update lint workflow * update to new black version * mdformat --- .../processing/actinia_processing/ephemeral_processing.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py index 27462cd..3a7f987 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -1718,10 +1718,10 @@ def _run_executable(self, process, poll_time=0.005): for x in par_val.split() }[filter] filtered_func_name += f"::{filter}" - process.executable_params[ - i - ] = process.executable_params[i].replace( - filtered_func_name, filtered_par_value + process.executable_params[i] = ( + process.executable_params[i].replace( + filtered_func_name, filtered_par_value + ) ) if process.stdin_source is not None: From ae09f70e66fd9bd5d7d426355f1d4032857badbc Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Wed, 4 Dec 2024 10:59:37 +0100 Subject: [PATCH 19/46] Renamed GRASS GIS locations to projects (#565) * add decorator for deprecated locations * add project endpoints * endpoints for projects * rename location * rename location * unify maintainer * fix GRASS Initialisation * black * fixes for tests and G83 * fixes * add rest of changes * Update tests/test_job_resumption.py * fix endpoints and job resumption tests * Update src/actinia_core/core/common/api_logger.py * Tests for G84 * Test pipelines for G8.3 and G8.4 * Update src/actinia_core/rest/base/resource_base.py * fix test workflow * fix tests for G83 * fix error due to lib update * fix proc name * Update src/actinia_core/endpoints.py Co-authored-by: Carmen Tawalika --------- Co-authored-by: Carmen Tawalika Co-authored-by: Carmen --- .../ephemeral_processing.py | 176 +++++++++--------- 1 file changed, 86 insertions(+), 90 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py index 3a7f987..705d3f8 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -68,7 +68,7 @@ ) from actinia_core.core.interim_results import InterimResult, get_directory_size from actinia_core.rest.base.user_auth import ( - check_location_mapset_module_access, + check_project_mapset_module_access, ) __license__ = "GPLv3" @@ -92,23 +92,23 @@ class EphemeralProcessing(object): e.g: /tmp/soeren_disdbase - 2. Create the required location directory + 2. Create the required project directory e.g: /tmp/soeren_temp_gisdbase/ECAD 3. Softlink the PERMANENT and all required mapsets into the - new location directory from the original location, + new project directory from the original project, check the input parameter of the module for which mapsets must be linked - e.g: /mount/groups/[user group]/locations/ECAD/PERMANENT + e.g: /mount/groups/[user group]/projects/ECAD/PERMANENT -> /tmp/soeren_temp_gisdbase/ECAD/PERMANENT - e.g: /mount/groups/[user group]/locations/ECAD/Temperature + e.g: /mount/groups/[user group]/projects/ECAD/Temperature -> /tmp/soeren_temp_gisdbase/ECAD/Temperature 4. Set the GRASS GIS environmental variables to point to the new gisdbase, - location and PERMANENT mapset + project and PERMANENT mapset - 5. Create a new mapset with g.mapset in the temporary location directory + 5. Create a new mapset with g.mapset in the temporary project directory e.g: /tmp/soeren_temp_gisdbase/ECAD/MyMapset @@ -178,7 +178,7 @@ def __init__(self, rdc): # extended with the user group name in the setup self.grass_base_dir = self.rdc.grass_base_dir - self.location_name = self.rdc.location_name + self.project_name = self.rdc.project_name self.mapset_name = self.rdc.mapset_name # Set this True if the work is performed based on global database self.is_global_database = False @@ -204,12 +204,12 @@ def __init__(self, rdc): self.temp_grass_data_base = ( None # Path to the temporary grass database ) - self.temp_location_path = None # Path to the temporary location + self.temp_project_path = None # Path to the temporary project self.temp_file_path = None # The path to store temporary created files - self.global_location_path = ( - None # The path to the global location to link + self.global_project_path = ( + None # The path to the global project to link ) - self.user_location_path = None # The path to the user location to link + self.user_project_path = None # The path to the user project to link # List of resources that should be created self.resource_export_list = list() @@ -239,7 +239,7 @@ def __init__(self, rdc): list() ) # The process chain analysis will provide # a list of required mapsets that must be - # linked in the temporary location + # linked in the temporary project # The module that was called in the process chain, to detect g.region # calls and check for correct region settings @@ -675,7 +675,7 @@ def _validate_process_chain( if process.exec_type == "grass" or process.exec_type == "exec": if skip_permission_check is False: if process.skip_permission_check is False: - resp = check_location_mapset_module_access( + resp = check_project_mapset_module_access( user_credentials=self.user_credentials, config=self.config, module_name=process.executable, @@ -715,23 +715,23 @@ def _setup(self, init_grass=True): - Create the redis lock interface for resource locking - Set cell limit, process number limit and process time limit from user credentials. - - Create all required paths to original and temporary location and + - Create all required paths to original and temporary project and mapsets. - - temp_location_path - - global_location_path + - temp_project_path + - global_project_path - grass_user_data_base <- This path will be created if it does not exist - - user_location_path <- This path will be created if it does not + - user_project_path <- This path will be created if it does not exist - temp_grass_data_base <- This path will be created - temp_file_path <- This path will be created - - Check if the current working location is in a persistent (global) + - Check if the current working project is in a persistent (global) GRASS GIS database (is_global_database) - Create the process chain to process list converter Args: init_grass (bool): Set true to initialize the user credentials - and the temporary database and location paths + and the temporary database and project paths """ # The setup should only be executed once @@ -776,7 +776,7 @@ def _setup(self, init_grass=True): ) # Check and create all required paths to global, user and temporary - # locations + # projects if init_grass is True: self._setup_paths() @@ -806,12 +806,12 @@ def _setup_paths(self): ) self.temp_file_path = os.path.join(self.temp_grass_data_base, ".tmp") - if self.location_name: - self.temp_location_path = os.path.join( - self.temp_grass_data_base, self.location_name + if self.project_name: + self.temp_project_path = os.path.join( + self.temp_grass_data_base, self.project_name ) - self.global_location_path = os.path.join( - self.grass_data_base, self.location_name + self.global_project_path = os.path.join( + self.grass_data_base, self.project_name ) # Create the user database path if it does not exist if not os.path.exists(self.grass_user_data_base): @@ -823,48 +823,48 @@ def _setup_paths(self): ) if not os.path.exists(self.grass_user_data_base): os.mkdir(self.grass_user_data_base) - # Create the user group specific location path, if it does not + # Create the user group specific project path, if it does not # exist - self.user_location_path = os.path.join( - self.grass_user_data_base, self.location_name + self.user_project_path = os.path.join( + self.grass_user_data_base, self.project_name ) - if not os.path.exists(self.user_location_path): - os.mkdir(self.user_location_path) - # Check if the location is located in the global database + if not os.path.exists(self.user_project_path): + os.mkdir(self.user_project_path) + # Check if the project is located in the global database self.is_global_database = False - location = os.path.join(self.grass_data_base, self.location_name) - if os.path.isdir(location): + project = os.path.join(self.grass_data_base, self.project_name) + if os.path.isdir(project): self.is_global_database = True - # Create the database, location and temporary file directories + # Create the database, project and temporary file directories os.mkdir(self.temp_grass_data_base) os.mkdir(self.temp_file_path) def _create_temp_database(self, mapsets=None): - """Create a temporary gis database with location and mapsets + """Create a temporary gis database with project and mapsets from the global and user group database for processing. IMPORTANT: All processing and mapaste management is performed within a temporary database! - Link the required existing mapsets of global and user group locations - into the temporary location directory. + Link the required existing mapsets of global and user group projects + into the temporary project directory. Linking is performed in two steps: - 1.) If the location is a global location, then the mapsets from the - global location are linked in the temporary locations - 2.) Then link all required mapsets from the user group location - into the temporary location + 1.) If the project is a global project, then the mapsets from the + global project are linked in the temporary projects + 2.) Then link all required mapsets from the user group project + into the temporary project - Only mapsets from the global location are linked into the temporary - location to which the user group has access. + Only mapsets from the global project are linked into the temporary + project to which the user group has access. It checks for access in the global database but not in the user group database. The user can always access its own data of its group. Args: mapsets: A list of mapset names that should be linked into - the temporary location. If the list is empty, all + the temporary project. If the list is empty, all available user accessible mapsets of the global - and user group specific location will be linked. + and user group specific project will be linked. Raises: This function raises AsyncProcessError in case of an error. @@ -875,8 +875,8 @@ def _create_temp_database(self, mapsets=None): mapsets = [] try: - # Create the temporary location directory - os.mkdir(self.temp_location_path) + # Create the temporary project directory + os.mkdir(self.temp_project_path) # Always link the PERMANENT mapset if len(mapsets) > 0 and "PERMANENT" not in mapsets: @@ -887,7 +887,7 @@ def _create_temp_database(self, mapsets=None): if not mapsets: check_all_mapsets = True - # User and global location mapset linking + # User and global project mapset linking self._link_mapsets(mapsets, mapsets_to_link, check_all_mapsets) # Check if we missed some of the required mapsets @@ -900,23 +900,21 @@ def _create_temp_database(self, mapsets=None): if mapset not in mapset_list: raise AsyncProcessError( "Unable to link all required mapsets into " - "temporary location. Missing or un-accessible " - f"mapset <{mapset}> in location " - f"<{self.location_name}>" + "temporary project. Missing or un-accessible " + f"mapset <{mapset}> in project " + f"<{self.project_name}>" ) # Link the original mapsets from global and user database into the - # temporary location + # temporary project for mapset_path, mapset in mapsets_to_link: if ( - os.path.isdir( - os.path.join(self.temp_location_path, mapset) - ) + os.path.isdir(os.path.join(self.temp_project_path, mapset)) is False ): os.symlink( mapset_path, - os.path.join(self.temp_location_path, mapset), + os.path.join(self.temp_project_path, mapset), ) except Exception as e: @@ -926,23 +924,23 @@ def _create_temp_database(self, mapsets=None): ) def _link_mapsets(self, mapsets, mapsets_to_link, check_all_mapsets): - """Helper method to link locations mapsets + """Helper method to link projects mapsets Args: - mapsets (list): List of mapsets in location + mapsets (list): List of mapsets in project mapsets_to_link (list): List of mapsets paths to link check_all_mapsets (bool): If set True, the mapsets list is created - with all locations on location_path + with all projects on project_path Returns: - mapsets (list): List of mapsets in location + mapsets (list): List of mapsets in project mapsets_to_link (list): List of mapsets paths to link """ - # Global location mapset linking + # Global project mapset linking if self.is_global_database is True: - # List all available mapsets in the global location + # List all available mapsets in the global project mapsets, mapsets_to_link = self._list_all_available_mapsets( - self.global_location_path, + self.global_project_path, mapsets, check_all_mapsets, mapsets_to_link, @@ -953,9 +951,9 @@ def _link_mapsets(self, mapsets, mapsets_to_link, check_all_mapsets): for mapset in mapsets: if mapset not in mapsets_to_link: left_over_mapsets.append(mapset) - # List all available mapsets in the user location + # List all available mapsets in the user project mapsets, mapsets_to_link = self._list_all_available_mapsets( - self.user_location_path, + self.user_project_path, left_over_mapsets, check_all_mapsets, mapsets_to_link, @@ -965,7 +963,7 @@ def _link_mapsets(self, mapsets, mapsets_to_link, check_all_mapsets): def _list_all_available_mapsets( self, - location_path, + project_path, mapsets, check_all_mapsets, mapsets_to_link, @@ -975,26 +973,26 @@ def _list_all_available_mapsets( it is checked if the mapset can be accessed. Args: - location_path (str): Path to location (global or user) + project_path (str): Path to project (global or user) mapsets (list): List of mapsets names to link. The mapsets list can be empty, if check_all_mapsets is True the list is filled with all mapsets from - the location_path + the project_path check_all_mapsets (bool): If set True, the mapsets list is created - with all locations on location_path + with all projects on project_path mapsets_to_link (list): List of mapset paths to link - global_db (bool): If set True, the location/mapset access is + global_db (bool): If set True, the project/mapset access is checked Returns: - mapsets (list): List of mapsets in location + mapsets (list): List of mapsets in project mapsets_to_link (list): List of mapsets paths to link """ - if os.path.isdir(location_path): + if os.path.isdir(project_path): if check_all_mapsets is True: - mapsets = os.listdir(location_path) + mapsets = os.listdir(project_path) for mapset in mapsets: - mapset_path = os.path.join(location_path, mapset) + mapset_path = os.path.join(project_path, mapset) if os.path.isdir(mapset_path) and os.access( mapset_path, os.R_OK & os.X_OK ): @@ -1006,10 +1004,10 @@ def _list_all_available_mapsets( if mapset not in mapsets_to_link and global_db is True: # Link the mapset from the global database # only if it can be accessed - resp = check_location_mapset_module_access( + resp = check_project_mapset_module_access( user_credentials=self.user_credentials, config=self.config, - location_name=self.location_name, + project_name=self.project_name, mapset_name=mapset, ) if resp is None: @@ -1021,19 +1019,16 @@ def _list_all_available_mapsets( mapsets_to_link.append((mapset_path, mapset)) else: raise AsyncProcessError( - "Invalid mapset <%s> in location <%s>" - % (mapset, self.location_name) + "Invalid mapset <%s> in project <%s>" + % (mapset, self.project_name) ) else: if global_db is True: msg = ( - "Unable to access global location <%s>" - % self.location_name + "Unable to access global project <%s>" % self.project_name ) else: - msg = ( - "Unable to access user location <%s>" % self.location_name - ) + msg = "Unable to access user project <%s>" % self.project_name raise AsyncProcessError(msg) return mapsets, mapsets_to_link @@ -1052,14 +1047,14 @@ def _create_grass_environment( """ self.message_logger.info( - "Initlialize GRASS grass_data_base: %s; location: %s; mapset: %s" - % (grass_data_base, self.location_name, mapset_name) + "Initlialize GRASS grass_data_base: %s; project: %s; mapset: %s" + % (grass_data_base, self.project_name, mapset_name) ) self.ginit = GrassInitializer( grass_data_base=grass_data_base, grass_base_dir=self.config.GRASS_GIS_BASE, - location_name=self.location_name, + project_name=self.project_name, mapset_name=mapset_name, config=self.config, grass_addon_path=self.config.GRASS_ADDON_PATH, @@ -1110,7 +1105,7 @@ def _create_temporary_mapset( """ self.temp_mapset_path = os.path.join( - self.temp_location_path, temp_mapset_name + self.temp_project_path, temp_mapset_name ) # if interim_result_mapset is set copy the mapset from the interim @@ -1172,6 +1167,7 @@ def _create_temporary_mapset( # Set the vector database connection to vector map specific databases self.ginit.run_module( "db.connect", + # TODO GRASS GIS 9.0 [ "driver=sqlite", "database=$GISDBASE/$LOCATION_NAME/$MAPSET/vector/$MAP/" @@ -1185,7 +1181,7 @@ def _create_temporary_mapset( # to the temporary mapset if source_mapset_name is not None and interim_result_mapset is None: source_mapset_path = os.path.join( - self.temp_location_path, source_mapset_name + self.temp_project_path, source_mapset_name ) if os.path.exists(os.path.join(source_mapset_path, "WIND")): shutil.copyfile( @@ -1830,7 +1826,7 @@ def _create_temporary_grass_environment( 3. Create temporary mapset This method will link the required mapsets that are - defined in *self.required_mapsets* into the location. + defined in *self.required_mapsets* into the project. The mapsets may be from the global and/or user database. Args: From 8a5bdffc51278e16b437da4790ec2e00efe46b54 Mon Sep 17 00:00:00 2001 From: Carmen Tawalika Date: Thu, 5 Dec 2024 10:05:17 +0100 Subject: [PATCH 20/46] Introduce ruff (#574) * rename location * fixes for tests and G83 * fix job resumption tests * linting * initial ruff check * update test imports * lint * F841 and Post-PR * lint * test with wip linting workflow * test post-pr * fix ref * trigger build * always-post-pr * post-pr in separate workflow * rename workflow * fix lint error * trigger pipelines * add ruff config * update post-pr workflow * Update .github/workflows/linting.yml * fix tests --------- Co-authored-by: anikaweinmann --- .../ephemeral_processing.py | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py index 705d3f8..6344e54 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -138,7 +138,8 @@ def __init__(self, rdc): try: from fluent import sender - self.has_fluent = True + if sender: + self.has_fluent = True except Exception: self.has_fluent = False @@ -507,7 +508,7 @@ def _post_to_webhook(self, document, type): webhook_retries = 1 webhook_sleep = 0 - http_code, response_model = pickle.loads(document) + _, response_model = pickle.loads(document) webhook_not_reached = True retry = 0 @@ -1238,7 +1239,7 @@ def _check_pixellimit_rimport(self, process_executable_params): # if extent=region set, vrt only for region, not complete input if extent_region: # first query region extents - errorid, stdout_gregion, stderr_gregion = self.ginit.run_module( + errorid, stdout_gregion, _ = self.ginit.run_module( "g.region", ["-ug"] ) if errorid != 0: @@ -1258,13 +1259,13 @@ def _check_pixellimit_rimport(self, process_executable_params): # build vrt with previous defined parameters ( errorid, - stdout_gdalbuildvrt, - stderr_gdalbuildvrt, + _, + _, ) = self.ginit.run_module("/usr/bin/gdalbuildvrt", gdabuildvrt_params) # gdalinfo for created vrt gdalinfo_params = [vrt_out] - errorid, stdout_gdalinfo, stderr_gdalinfo = self.ginit.run_module( + errorid, stdout_gdalinfo, _ = self.ginit.run_module( "/usr/bin/gdalinfo", gdalinfo_params ) # parse "Size" output of gdalinfo @@ -1284,7 +1285,7 @@ def _check_pixellimit_rimport(self, process_executable_params): # If raster exceeds cell limit already in original resolution, next part can be skipped if rimport_res and (rastersize < self.cell_limit): # determine estimated resolution - errorid, stdout_estres, stderr_estres = self.ginit.run_module( + errorid, _, stderr_estres = self.ginit.run_module( "r.import", [vrt_out, "-e"] ) if "Estimated" in stderr_estres: @@ -1320,7 +1321,7 @@ def _check_pixellimit_rimport(self, process_executable_params): ( errorid, stdout_gregion, - stderr_gregion, + _, ) = self.ginit.run_module("g.region", ["-ug"]) res_val_ns = float( [x for x in stdout_gregion.split("\n") if "nsres=" in x][ @@ -1366,9 +1367,7 @@ def _check_reset_region(self): if self.skip_region_check is True: return - errorid, stdout_buff, stderr_buff = self.ginit.run_module( - "g.region", ["-ug"] - ) + errorid, stdout_buff, _ = self.ginit.run_module("g.region", ["-ug"]) if errorid != 0: raise AsyncProcessError( @@ -1690,7 +1689,7 @@ def _run_executable(self, process, poll_time=0.005): for i in range(len(process.executable_params)): param = process.executable_params[i] if func_name in param: - par, val = param.split("=", 1) + _, val = param.split("=", 1) par_val = func().strip() val_splitted = val.split(func_name) for j in range(1, len(val_splitted)): From 03f22e916ce279983fd09974581117d74a675893 Mon Sep 17 00:00:00 2001 From: linakrisztian <106728040+linakrisztian@users.noreply.github.com> Date: Tue, 15 Apr 2025 14:13:53 +0200 Subject: [PATCH 21/46] Remaining changes for switch of redis to valkey (#601) * update setup from redis to valkey * replace redis with valkey calls + remove non needed redis-remainings * replace redis import with valkey import * adjust function from redis to valkey * replace redis by valkey function * fix call of valkey commands fitting valkey-syntax * Semantic renaming: redis to kvdb (#602) * semantic renaming: redis to kvdb * remove non semantic changes * remove non semantic changes * Apply suggestions from code review Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * remove non semantic changes * update readme * Update src/actinia_core/README.md Co-authored-by: Carmen Tawalika * Update src/actinia_core/core/common/config.py Co-authored-by: Carmen Tawalika * Update src/actinia_core/README.md Co-authored-by: Carmen Tawalika * Update docker/actinia-core-alpine/actinia.cfg Co-authored-by: Carmen Tawalika * PR review CT --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Carmen Tawalika * linting: black --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Carmen Tawalika --- .../actinia_processing/ephemeral_processing.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py index 6344e54..ae69f22 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral_processing.py @@ -44,7 +44,7 @@ from actinia_core.core.common.process_object import Process from actinia_core.core.grass_init import GrassInitializer from actinia_core.core.messages_logger import MessageLogger -from actinia_core.core.redis_lock import RedisLockingInterface +from actinia_core.core.kvdb_lock import KvdbLockingInterface from actinia_core.core.resources_logger import ResourceLogger from actinia_core.core.mapset_merge_utils import change_mapsetname from actinia_core.core.common.process_chain import ( @@ -461,7 +461,7 @@ def _send_to_database(self, document, final=False): resource_id=self.resource_id, iteration=self.iteration, document=document, - expiration=self.config.REDIS_RESOURCE_EXPIRE_TIME, + expiration=self.config.KVDB_RESOURCE_EXPIRE_TIME, ) # Call the webhook after the final result was send to the database @@ -713,7 +713,7 @@ def _setup(self, init_grass=True): What is done: - Create the resource and message logger - - Create the redis lock interface for resource locking + - Create the kvdb lock interface for resource locking - Set cell limit, process number limit and process time limit from user credentials. - Create all required paths to original and temporary project and @@ -752,13 +752,13 @@ def _setup(self, init_grass=True): port=self.config.LOG_FLUENT_PORT, ) kwargs = dict() - kwargs["host"] = self.config.REDIS_SERVER_URL - kwargs["port"] = self.config.REDIS_SERVER_PORT + kwargs["host"] = self.config.KVDB_SERVER_URL + kwargs["port"] = self.config.KVDB_SERVER_PORT if ( - self.config.REDIS_SERVER_PW - and self.config.REDIS_SERVER_PW is not None + self.config.KVDB_SERVER_PW + and self.config.KVDB_SERVER_PW is not None ): - kwargs["password"] = self.config.REDIS_SERVER_PW + kwargs["password"] = self.config.KVDB_SERVER_PW self.resource_logger = ResourceLogger( **kwargs, fluent_sender=fluent_sender ) @@ -769,7 +769,7 @@ def _setup(self, init_grass=True): fluent_sender=fluent_sender, ) - self.lock_interface = RedisLockingInterface() + self.lock_interface = KvdbLockingInterface() self.lock_interface.connect(**kwargs) del kwargs self.process_time_limit = int( From 00dd79e1106bd874b4f2b95c423c438395aa8427 Mon Sep 17 00:00:00 2001 From: Carmen Tawalika Date: Fri, 1 Apr 2022 11:34:10 +0300 Subject: [PATCH 22/46] Refactor rest - part 2 (#322) * reactivate redis queue * rename variable * make queue type configurable * enhance dev setup for redis queue * lint * move user_auth * move user auth * move base_login * move map_layer_base * move renderer_base * move resource_base * fix relative imports * lint * first splitup: raster_colors * splitup download_cache_management * splitup ephemeral_custom_processing * splitup ephemeral_processing_with_export * splitup ephemeral_processing * splitup renderer_base * splitup persistent_processing * splitup location_management * splitup map_layer_management * splitup mapset_management * splitup persistent_mapset_merger * splitup process_validation * splitup raster_export * splitup raster_layer * splitup raster_legend * lint * splitup raster_renderer * splitup resource_storage_management * splitup strds_management * splitup strds_raster_management * splitup strds_renderer * splitup vector_layer * splitup vector_renderer * lint * make inheritance more clear * add readme * fix import --- src/actinia_core/processing/common/utils.py | 64 +++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 src/actinia_core/processing/common/utils.py diff --git a/src/actinia_core/processing/common/utils.py b/src/actinia_core/processing/common/utils.py new file mode 100644 index 0000000..e15846a --- /dev/null +++ b/src/actinia_core/processing/common/utils.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +####### +# actinia-core - an open source REST API for scalable, distributed, high +# performance processing of geographical data that uses GRASS GIS for +# computational tasks. For details, see https://actinia.mundialis.de/ +# +# Copyright (c) 2016-2022 Sören Gebbert and mundialis GmbH & Co. KG +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +####### + +""" +Utils for processing +""" + +import importlib +from actinia_core.core.common.config import global_config + +__license__ = "GPLv3" +__author__ = "Carmen Tawalika" +__copyright__ = "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" +__maintainer__ = "mundialis" + + +def try_import(actinia_module, actinia_class): + """Tries to import module, if module not found, throw exception + if local queue is used. + This is used to import processing classes. When job receiver + cannot import it and redis queue is used, this is not a problem. + When job receiver cannot import it and local queue is used, processing + wouldn't work and the exception is thrown. + Job executor needs to have it installed either way. + + Args: + actinia_module (string): processing module to use + + Raises: + e: ImportError or subclass + + Returns: + module: python module used for processing + """ + try: + imported_module = importlib.import_module(actinia_module, package=None) + imported_class = getattr(imported_module, actinia_class) + return imported_class + except ImportError as e: + # ModuleNotFoundError is a subclass and catched here as well. + if (global_config.QUEUE_TYPE == "local"): + e.msg += (" - No actinia_processing found but required " + + "for local queue!") + raise e From a5ea4c9a20a1edc523a2bd0d2a5684bdc357c789 Mon Sep 17 00:00:00 2001 From: Markus Neteler Date: Mon, 1 Aug 2022 14:09:10 +0200 Subject: [PATCH 23/46] actinia tests: fix test descriptions (#371) * tests: fix test descriptions This PR fixes some test descriptions and a few typos. --- src/actinia_core/processing/common/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/actinia_core/processing/common/utils.py b/src/actinia_core/processing/common/utils.py index e15846a..6715ec9 100644 --- a/src/actinia_core/processing/common/utils.py +++ b/src/actinia_core/processing/common/utils.py @@ -57,7 +57,7 @@ def try_import(actinia_module, actinia_class): imported_class = getattr(imported_module, actinia_class) return imported_class except ImportError as e: - # ModuleNotFoundError is a subclass and catched here as well. + # ModuleNotFoundError is a subclass and caught here as well. if (global_config.QUEUE_TYPE == "local"): e.msg += (" - No actinia_processing found but required " + "for local queue!") From 85a0600f5869d29dad5474d582b9efbd2cfd3084 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Thu, 22 Sep 2022 15:19:34 +0200 Subject: [PATCH 24/46] Black (#378) * linting tests + black workflow * linting * black * further black * fix unittests * fix test Co-authored-by: anikaweinmann --- src/actinia_core/processing/common/utils.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/actinia_core/processing/common/utils.py b/src/actinia_core/processing/common/utils.py index 6715ec9..e9145e4 100644 --- a/src/actinia_core/processing/common/utils.py +++ b/src/actinia_core/processing/common/utils.py @@ -30,7 +30,9 @@ __license__ = "GPLv3" __author__ = "Carmen Tawalika" -__copyright__ = "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" +__copyright__ = ( + "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" +) __maintainer__ = "mundialis" @@ -58,7 +60,9 @@ def try_import(actinia_module, actinia_class): return imported_class except ImportError as e: # ModuleNotFoundError is a subclass and caught here as well. - if (global_config.QUEUE_TYPE == "local"): - e.msg += (" - No actinia_processing found but required " - + "for local queue!") + if global_config.QUEUE_TYPE == "local": + e.msg += ( + " - No actinia_processing found but required " + + "for local queue!" + ) raise e From d92bf0343ff193e13d1ffa1bb4a50e1ccf649747 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Wed, 4 Dec 2024 10:59:37 +0100 Subject: [PATCH 25/46] Renamed GRASS GIS locations to projects (#565) * add decorator for deprecated locations * add project endpoints * endpoints for projects * rename location * rename location * unify maintainer * fix GRASS Initialisation * black * fixes for tests and G83 * fixes * add rest of changes * Update tests/test_job_resumption.py * fix endpoints and job resumption tests * Update src/actinia_core/core/common/api_logger.py * Tests for G84 * Test pipelines for G8.3 and G8.4 * Update src/actinia_core/rest/base/resource_base.py * fix test workflow * fix tests for G83 * fix error due to lib update * fix proc name * Update src/actinia_core/endpoints.py Co-authored-by: Carmen Tawalika --------- Co-authored-by: Carmen Tawalika Co-authored-by: Carmen --- src/actinia_core/processing/common/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/actinia_core/processing/common/utils.py b/src/actinia_core/processing/common/utils.py index e9145e4..195b290 100644 --- a/src/actinia_core/processing/common/utils.py +++ b/src/actinia_core/processing/common/utils.py @@ -33,7 +33,8 @@ __copyright__ = ( "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" ) -__maintainer__ = "mundialis" +__maintainer__ = "mundialis GmbH & Co. KG" +__email__ = "info@mundialis.de" def try_import(actinia_module, actinia_class): From 1fa1086c5166d1d02a9457930b0786f5d31df37a Mon Sep 17 00:00:00 2001 From: linakrisztian <106728040+linakrisztian@users.noreply.github.com> Date: Tue, 15 Apr 2025 14:13:53 +0200 Subject: [PATCH 26/46] Remaining changes for switch of redis to valkey (#601) * update setup from redis to valkey * replace redis with valkey calls + remove non needed redis-remainings * replace redis import with valkey import * adjust function from redis to valkey * replace redis by valkey function * fix call of valkey commands fitting valkey-syntax * Semantic renaming: redis to kvdb (#602) * semantic renaming: redis to kvdb * remove non semantic changes * remove non semantic changes * Apply suggestions from code review Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * remove non semantic changes * update readme * Update src/actinia_core/README.md Co-authored-by: Carmen Tawalika * Update src/actinia_core/core/common/config.py Co-authored-by: Carmen Tawalika * Update src/actinia_core/README.md Co-authored-by: Carmen Tawalika * Update docker/actinia-core-alpine/actinia.cfg Co-authored-by: Carmen Tawalika * PR review CT --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Carmen Tawalika * linting: black --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Carmen Tawalika --- src/actinia_core/processing/common/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/actinia_core/processing/common/utils.py b/src/actinia_core/processing/common/utils.py index 195b290..a32ff25 100644 --- a/src/actinia_core/processing/common/utils.py +++ b/src/actinia_core/processing/common/utils.py @@ -41,7 +41,7 @@ def try_import(actinia_module, actinia_class): """Tries to import module, if module not found, throw exception if local queue is used. This is used to import processing classes. When job receiver - cannot import it and redis queue is used, this is not a problem. + cannot import it and kvdb queue is used, this is not a problem. When job receiver cannot import it and local queue is used, processing wouldn't work and the exception is thrown. Job executor needs to have it installed either way. From 6cd94d50366d64bb3ce5c640e2a06e94438da424 Mon Sep 17 00:00:00 2001 From: mmacata Date: Wed, 2 Jun 2021 17:56:25 +0200 Subject: [PATCH 27/46] Move modules used by plugins (#221) * move modules used by plugins * move shared modules to common folder --- src/actinia_core/core/common/exceptions.py | 73 ++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 src/actinia_core/core/common/exceptions.py diff --git a/src/actinia_core/core/common/exceptions.py b/src/actinia_core/core/common/exceptions.py new file mode 100644 index 0000000..81175e2 --- /dev/null +++ b/src/actinia_core/core/common/exceptions.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +####### +# actinia-core - an open source REST API for scalable, distributed, high +# performance processing of geographical data that uses GRASS GIS for +# computational tasks. For details, see https://actinia.mundialis.de/ +# +# Copyright (c) 2016-2018 Sören Gebbert and mundialis GmbH & Co. KG +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +####### + +""" +Actinia Core Exceptions that should be used in case an error occurs that is +related to the Actinia Core functionality +""" + +__license__ = "GPLv3" +__author__ = "Sören Gebbert" +__copyright__ = "Copyright 2016-2018, Sören Gebbert and mundialis GmbH & Co. KG" +__maintainer__ = "mundialis" + + +class AsyncProcessError(Exception): + """Raise this exception in case the asynchronous processing faces an error + """ + def __init__(self, message): + message = "%s: %s" % (str(self.__class__.__name__), message) + Exception.__init__(self, message) + + +class RsyncError(Exception): + """Raise this exception in case the rsync of the interim result fails + """ + def __init__(self, message): + message = "%s: %s" % (str(self.__class__.__name__), message) + Exception.__init__(self, message) + + +class AsyncProcessTermination(Exception): + """Raise this exception in case the termination requests was executed successfully + """ + def __init__(self, message): + message = "%s: %s" % (str(self.__class__.__name__), message) + Exception.__init__(self, message) + + +class AsyncProcessTimeLimit(Exception): + """Raise this exception in case the process time limit was reached + """ + def __init__(self, message): + message = "%s: %s" % (str(self.__class__.__name__), message) + Exception.__init__(self, message) + + +class GoogleCloudAPIError(Exception): + """Raise this exception in case something went wrong in + when accessing the google API + """ + def __init__(self, message): + message = "%s: %s" % (str(self.__class__.__name__), message) + Exception.__init__(self, message) From c03349013325fbf7f9ac20c52d5825b8d35ce9fb Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Fri, 26 Nov 2021 11:54:30 +0100 Subject: [PATCH 28/46] try to fix uncontrolled data used in path (#280) Co-authored-by: anikaweinmann --- src/actinia_core/core/common/exceptions.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/actinia_core/core/common/exceptions.py b/src/actinia_core/core/common/exceptions.py index 81175e2..8850dea 100644 --- a/src/actinia_core/core/common/exceptions.py +++ b/src/actinia_core/core/common/exceptions.py @@ -71,3 +71,11 @@ class GoogleCloudAPIError(Exception): def __init__(self, message): message = "%s: %s" % (str(self.__class__.__name__), message) Exception.__init__(self, message) + + +class SecurityError(Exception): + """Raise this exception in case some security problem occurs + """ + def __init__(self, message): + message = "%s: %s" % (str(self.__class__.__name__), message) + Exception.__init__(self, message) From 3870c9f62f9c37f238f06c1b8a1a901a8863f275 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Thu, 22 Sep 2022 15:19:34 +0200 Subject: [PATCH 29/46] Black (#378) * linting tests + black workflow * linting * black * further black * fix unittests * fix test Co-authored-by: anikaweinmann --- src/actinia_core/core/common/exceptions.py | 26 ++++++++++++++-------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/actinia_core/core/common/exceptions.py b/src/actinia_core/core/common/exceptions.py index 8850dea..9b5113e 100644 --- a/src/actinia_core/core/common/exceptions.py +++ b/src/actinia_core/core/common/exceptions.py @@ -28,37 +28,44 @@ __license__ = "GPLv3" __author__ = "Sören Gebbert" -__copyright__ = "Copyright 2016-2018, Sören Gebbert and mundialis GmbH & Co. KG" +__copyright__ = ( + "Copyright 2016-2018, Sören Gebbert and mundialis GmbH & Co. KG" +) __maintainer__ = "mundialis" class AsyncProcessError(Exception): - """Raise this exception in case the asynchronous processing faces an error """ + Raise this exception in case the asynchronous processing faces an error + """ + def __init__(self, message): message = "%s: %s" % (str(self.__class__.__name__), message) Exception.__init__(self, message) class RsyncError(Exception): - """Raise this exception in case the rsync of the interim result fails - """ + """Raise this exception in case the rsync of the interim result fails""" + def __init__(self, message): message = "%s: %s" % (str(self.__class__.__name__), message) Exception.__init__(self, message) class AsyncProcessTermination(Exception): - """Raise this exception in case the termination requests was executed successfully """ + Raise this exception in case the termination requests was executed + successfully + """ + def __init__(self, message): message = "%s: %s" % (str(self.__class__.__name__), message) Exception.__init__(self, message) class AsyncProcessTimeLimit(Exception): - """Raise this exception in case the process time limit was reached - """ + """Raise this exception in case the process time limit was reached""" + def __init__(self, message): message = "%s: %s" % (str(self.__class__.__name__), message) Exception.__init__(self, message) @@ -68,14 +75,15 @@ class GoogleCloudAPIError(Exception): """Raise this exception in case something went wrong in when accessing the google API """ + def __init__(self, message): message = "%s: %s" % (str(self.__class__.__name__), message) Exception.__init__(self, message) class SecurityError(Exception): - """Raise this exception in case some security problem occurs - """ + """Raise this exception in case some security problem occurs""" + def __init__(self, message): message = "%s: %s" % (str(self.__class__.__name__), message) Exception.__init__(self, message) From 30f827cc7c21d47f26292c1daa947c50ca62216e Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Wed, 4 Dec 2024 10:59:37 +0100 Subject: [PATCH 30/46] Renamed GRASS GIS locations to projects (#565) * add decorator for deprecated locations * add project endpoints * endpoints for projects * rename location * rename location * unify maintainer * fix GRASS Initialisation * black * fixes for tests and G83 * fixes * add rest of changes * Update tests/test_job_resumption.py * fix endpoints and job resumption tests * Update src/actinia_core/core/common/api_logger.py * Tests for G84 * Test pipelines for G8.3 and G8.4 * Update src/actinia_core/rest/base/resource_base.py * fix test workflow * fix tests for G83 * fix error due to lib update * fix proc name * Update src/actinia_core/endpoints.py Co-authored-by: Carmen Tawalika --------- Co-authored-by: Carmen Tawalika Co-authored-by: Carmen --- src/actinia_core/core/common/exceptions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/actinia_core/core/common/exceptions.py b/src/actinia_core/core/common/exceptions.py index 9b5113e..ecbbfcc 100644 --- a/src/actinia_core/core/common/exceptions.py +++ b/src/actinia_core/core/common/exceptions.py @@ -31,7 +31,8 @@ __copyright__ = ( "Copyright 2016-2018, Sören Gebbert and mundialis GmbH & Co. KG" ) -__maintainer__ = "mundialis" +__maintainer__ = "mundialis GmbH & Co. KG" +__email__ = "info@mundialis.de" class AsyncProcessError(Exception): From 9162db6df4b22b0ef47f734e013c166370809c44 Mon Sep 17 00:00:00 2001 From: Carmen Date: Mon, 2 Jun 2025 15:11:06 +0200 Subject: [PATCH 31/46] split up module from actinia-core --- src/actinia_core/core/common/exceptions.py | 38 +++++++++++----------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/actinia_core/core/common/exceptions.py b/src/actinia_core/core/common/exceptions.py index ecbbfcc..61bcacd 100644 --- a/src/actinia_core/core/common/exceptions.py +++ b/src/actinia_core/core/common/exceptions.py @@ -4,7 +4,7 @@ # performance processing of geographical data that uses GRASS GIS for # computational tasks. For details, see https://actinia.mundialis.de/ # -# Copyright (c) 2016-2018 Sören Gebbert and mundialis GmbH & Co. KG +# Copyright (c) 2016-2025 Sören Gebbert and mundialis GmbH & Co. KG # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -29,7 +29,7 @@ __license__ = "GPLv3" __author__ = "Sören Gebbert" __copyright__ = ( - "Copyright 2016-2018, Sören Gebbert and mundialis GmbH & Co. KG" + "Copyright 2016-2025, Sören Gebbert and mundialis GmbH & Co. KG" ) __maintainer__ = "mundialis GmbH & Co. KG" __email__ = "info@mundialis.de" @@ -45,12 +45,12 @@ def __init__(self, message): Exception.__init__(self, message) -class RsyncError(Exception): - """Raise this exception in case the rsync of the interim result fails""" +# class RsyncError(Exception): +# """Raise this exception in case the rsync of the interim result fails""" - def __init__(self, message): - message = "%s: %s" % (str(self.__class__.__name__), message) - Exception.__init__(self, message) +# def __init__(self, message): +# message = "%s: %s" % (str(self.__class__.__name__), message) +# Exception.__init__(self, message) class AsyncProcessTermination(Exception): @@ -72,19 +72,19 @@ def __init__(self, message): Exception.__init__(self, message) -class GoogleCloudAPIError(Exception): - """Raise this exception in case something went wrong in - when accessing the google API - """ +# class GoogleCloudAPIError(Exception): +# """Raise this exception in case something went wrong in +# when accessing the google API +# """ - def __init__(self, message): - message = "%s: %s" % (str(self.__class__.__name__), message) - Exception.__init__(self, message) +# def __init__(self, message): +# message = "%s: %s" % (str(self.__class__.__name__), message) +# Exception.__init__(self, message) -class SecurityError(Exception): - """Raise this exception in case some security problem occurs""" +# class SecurityError(Exception): +# """Raise this exception in case some security problem occurs""" - def __init__(self, message): - message = "%s: %s" % (str(self.__class__.__name__), message) - Exception.__init__(self, message) +# def __init__(self, message): +# message = "%s: %s" % (str(self.__class__.__name__), message) +# Exception.__init__(self, message) From 0c024c6a20b5d4081938250609f935538bdcb379 Mon Sep 17 00:00:00 2001 From: Carmen Date: Mon, 2 Jun 2025 17:14:48 +0200 Subject: [PATCH 32/46] cleanup --- pyproject.toml | 10 ++- src/actinia_processing_lib/api/__init__.py | 4 - src/actinia_processing_lib/api/helloworld.py | 57 ------------ .../api/project_helloworld.py | 83 ----------------- .../apidocs/__init__.py | 4 - .../apidocs/helloworld.py | 66 -------------- .../apidocs/project_helloworld.py | 88 ------------------- src/actinia_processing_lib/core/__init__.py | 4 - .../core/common/__init__.py | 0 .../core/common/exceptions.py | 0 src/actinia_processing_lib/core/example.py | 36 -------- src/actinia_processing_lib/endpoints.py | 63 ------------- src/actinia_processing_lib/model/__init__.py | 4 - .../model/response_models.py | 52 ----------- .../ephemeral/persistent_processing.py | 5 +- .../ephemeral_processing.py | 13 +-- .../processing/common/utils.py | 0 src/actinia_processing_lib/wsgi.py | 21 ----- 18 files changed, 16 insertions(+), 494 deletions(-) delete mode 100644 src/actinia_processing_lib/api/__init__.py delete mode 100644 src/actinia_processing_lib/api/helloworld.py delete mode 100644 src/actinia_processing_lib/api/project_helloworld.py delete mode 100644 src/actinia_processing_lib/apidocs/__init__.py delete mode 100644 src/actinia_processing_lib/apidocs/helloworld.py delete mode 100644 src/actinia_processing_lib/apidocs/project_helloworld.py create mode 100644 src/actinia_processing_lib/core/common/__init__.py rename src/{actinia_core => actinia_processing_lib}/core/common/exceptions.py (100%) delete mode 100644 src/actinia_processing_lib/core/example.py delete mode 100644 src/actinia_processing_lib/endpoints.py delete mode 100644 src/actinia_processing_lib/model/__init__.py delete mode 100644 src/actinia_processing_lib/model/response_models.py rename src/{actinia_core => actinia_processing_lib}/processing/actinia_processing/ephemeral/persistent_processing.py (99%) rename src/{actinia_core => actinia_processing_lib}/processing/actinia_processing/ephemeral_processing.py (99%) rename src/{actinia_core => actinia_processing_lib}/processing/common/utils.py (100%) delete mode 100644 src/actinia_processing_lib/wsgi.py diff --git a/pyproject.toml b/pyproject.toml index e83c9be..18cf37e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,11 +5,14 @@ build-backend = "setuptools.build_meta" [project] name = "actinia-processing-lib" version = "1.0.0" -description = "An actinia-core plugin which adds example endpoints to actinia-core" +description = "An actinia library for common processing functionality" readme = "README.md" authors = [ { name = "Carmen Tawalika"}, { name = "Anika Weinmann"}, + { name = "Markus Neteler"}, + { name = "Julia Haas"}, + { name = "Lina Krisztian"}, ] classifiers = [ "License :: OSI Approved :: Apache Software License", @@ -20,15 +23,14 @@ keywords = [ "processing", "earth observation", "cloud-based processing", - "rest api", "gis", "grass gis", "osgeo", "example", ] dependencies = [ - "colorlog>=4.2.1", - "xmltodict", + "flask", + "actinia-core", ] [project.optional-dependencies] diff --git a/src/actinia_processing_lib/api/__init__.py b/src/actinia_processing_lib/api/__init__.py deleted file mode 100644 index 4f3e2a8..0000000 --- a/src/actinia_processing_lib/api/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""actinia-example-plguin API part of package. - -This part provides the API part of the actinia-processing-lib. -""" diff --git a/src/actinia_processing_lib/api/helloworld.py b/src/actinia_processing_lib/api/helloworld.py deleted file mode 100644 index 0db22dc..0000000 --- a/src/actinia_processing_lib/api/helloworld.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python -"""Copyright (c) 2018-2024 mundialis GmbH & Co. KG. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -Hello World class -""" - -__license__ = "GPLv3" -__author__ = "Anika Weinmann" -__copyright__ = "Copyright 2022 mundialis GmbH & Co. KG" -__maintainer__ = "mundialis GmbH & Co. KG" - - -from flask import make_response, request -from flask_restful_swagger_2 import Resource, swagger - -from actinia_processing_lib.apidocs import helloworld -from actinia_processing_lib.core.example import transform_input -from actinia_processing_lib.model.response_models import ( - SimpleStatusCodeResponseModel, -) - - -class HelloWorld(Resource): - """Returns 'Hello world!'.""" - - def __init__(self) -> None: - """Hello world class initialisation.""" - self.msg = "Hello world!" - - @swagger.doc(helloworld.describe_hello_world_get_docs) - def get(self) -> SimpleStatusCodeResponseModel: - """Get 'Hello world!' as answer string.""" - return SimpleStatusCodeResponseModel(status=200, message=self.msg) - - @swagger.doc(helloworld.describe_hello_world_post_docs) - def post(self) -> SimpleStatusCodeResponseModel: - """Hello World post method with name from postbody.""" - req_data = request.get_json(force=True) - if isinstance(req_data, dict) is False or "name" not in req_data: - return make_response("Missing name in JSON content", 400) - name = req_data["name"] - msg = f"{self.msg} {transform_input(name)}" - - return SimpleStatusCodeResponseModel(status=200, message=msg) diff --git a/src/actinia_processing_lib/api/project_helloworld.py b/src/actinia_processing_lib/api/project_helloworld.py deleted file mode 100644 index c1cd58a..0000000 --- a/src/actinia_processing_lib/api/project_helloworld.py +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python -"""Copyright (c) 2018-2024 mundialis GmbH & Co. KG. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -Hello World class -""" - -__license__ = "GPLv3" -__author__ = "Anika Weinmann" -__copyright__ = "Copyright 2024 mundialis GmbH & Co. KG" -__maintainer__ = "mundialis GmbH & Co. KG" - - -from typing import ClassVar - -from actinia_core.models.response_models import SimpleResponseModel -from actinia_core.rest.base.deprecated_locations import ( - location_deprecated_decorator, -) -from flask import jsonify, make_response, request -from flask.wrappers import Response -from flask_restful_swagger_2 import Resource, swagger - -from actinia_processing_lib.apidocs import project_helloworld -from actinia_processing_lib.core.example import transform_input - - -class ProjectHelloWorld(Resource): - """Returns 'Hello world with project/location!'.""" - - decorators: ClassVar[list] = [] - - # Add decorators for deprecated GRASS GIS locations - decorators.append(location_deprecated_decorator) - - def __init__(self) -> None: - """Project hello world class initialisation.""" - self.msg = "Project: Hello world!" - - @swagger.doc(project_helloworld.describe_project_hello_world_get_docs) - def get(self, project_name: str) -> Response: - """Get 'Hello world!' as answer string.""" - msg = f"{self.msg} {project_name}" - return make_response( - jsonify( - SimpleResponseModel( - status="200", - message=msg, - ), - ), - 200, - ) - - @swagger.doc(project_helloworld.describe_project_hello_world_post_docs) - def post(self, project_name: str) -> Response: - """Hello World post method with name from postbody.""" - req_data = request.get_json(force=True) - if isinstance(req_data, dict) is False or "name" not in req_data: - return make_response("Missing name in JSON content", 400) - name = req_data["name"] - msg = f"{self.msg} {transform_input(name)} {project_name}" - - return make_response( - jsonify( - SimpleResponseModel( - status="200", - message=msg, - ), - ), - 200, - ) diff --git a/src/actinia_processing_lib/apidocs/__init__.py b/src/actinia_processing_lib/apidocs/__init__.py deleted file mode 100644 index 0ff3d50..0000000 --- a/src/actinia_processing_lib/apidocs/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""actinia-example-plguin API DOCs part of package. - -This part provides the API DOCs part of the actinia-processing-lib. -""" diff --git a/src/actinia_processing_lib/apidocs/helloworld.py b/src/actinia_processing_lib/apidocs/helloworld.py deleted file mode 100644 index a4a8b24..0000000 --- a/src/actinia_processing_lib/apidocs/helloworld.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python -"""Copyright (c) 2018-2024 mundialis GmbH & Co. KG. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -Hello World class -""" - -__license__ = "GPLv3" -__author__ = "Anika Weinmann" -__copyright__ = "Copyright 2022 mundialis GmbH & Co. KG" -__maintainer__ = "mundialis GmbH & Co. KG" - - -from actinia_processing_lib.model.response_models import ( - SimpleStatusCodeResponseModel, -) - -describe_hello_world_get_docs = { - # "summary" is taken from the description of the get method - "tags": ["example"], - "description": "Hello World example", - "responses": { - "200": { - "description": "This response returns the string 'Hello World!'", - "schema": SimpleStatusCodeResponseModel, - }, - }, -} - -describe_hello_world_post_docs = { - # "summary" is taken from the description of the get method - "tags": ["example"], - "description": "Hello World example with name", - "responses": { - "200": { - "description": "This response returns the string 'Hello World " - "NAME!'", - "schema": SimpleStatusCodeResponseModel, - }, - "400": { - "description": "This response returns a detail error message", - "schema": { - "type": "object", - "properties": { - "message": { - "type": "string", - "description": "detailed message", - "example": "Missing name in JSON content", - }, - }, - }, - }, - }, -} diff --git a/src/actinia_processing_lib/apidocs/project_helloworld.py b/src/actinia_processing_lib/apidocs/project_helloworld.py deleted file mode 100644 index 248f07a..0000000 --- a/src/actinia_processing_lib/apidocs/project_helloworld.py +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env python -"""Copyright (c) 2018-2024 mundialis GmbH & Co. KG. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -Hello World class -""" - -__license__ = "GPLv3" -__author__ = "Anika Weinmann" -__copyright__ = "Copyright 2024 mundialis GmbH & Co. KG" -__maintainer__ = "mundialis GmbH & Co. KG" - - -from actinia_processing_lib.model.response_models import ( - SimpleStatusCodeResponseModel, -) - -describe_project_hello_world_get_docs = { - # "summary" is taken from the description of the get method - "tags": ["example"], - "description": "Project Hello World example", - "parameters": [ - { - "name": "project_name", - "description": "The project name that contains the data that " - "should be processed", - "required": True, - "in": "path", - "type": "string", - "default": "nc_spm_08", - }, - ], - "responses": { - "200": { - "description": "This response returns the string 'Hello World!'", - "schema": SimpleStatusCodeResponseModel, - }, - }, -} - -describe_project_hello_world_post_docs = { - # "summary" is taken from the description of the get method - "tags": ["example"], - "description": "Project Hello World example with name", - "parameters": [ - { - "name": "project_name", - "description": "The project name that contains the data that " - "should be processed", - "required": True, - "in": "path", - "type": "string", - "default": "nc_spm_08", - }, - ], - "responses": { - "200": { - "description": "This response returns the string 'Hello World " - "NAME!'", - "schema": SimpleStatusCodeResponseModel, - }, - "400": { - "description": "This response returns a detail error message", - "schema": { - "type": "object", - "properties": { - "message": { - "type": "string", - "description": "detailed message", - "example": "Missing name in JSON content", - }, - }, - }, - }, - }, -} diff --git a/src/actinia_processing_lib/core/__init__.py b/src/actinia_processing_lib/core/__init__.py index 1c0d78c..e69de29 100644 --- a/src/actinia_processing_lib/core/__init__.py +++ b/src/actinia_processing_lib/core/__init__.py @@ -1,4 +0,0 @@ -"""actinia-example-plguin core part of package. - -This part provides the core part of the actinia-processing-lib. -""" diff --git a/src/actinia_processing_lib/core/common/__init__.py b/src/actinia_processing_lib/core/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/actinia_core/core/common/exceptions.py b/src/actinia_processing_lib/core/common/exceptions.py similarity index 100% rename from src/actinia_core/core/common/exceptions.py rename to src/actinia_processing_lib/core/common/exceptions.py diff --git a/src/actinia_processing_lib/core/example.py b/src/actinia_processing_lib/core/example.py deleted file mode 100644 index 0c50b4d..0000000 --- a/src/actinia_processing_lib/core/example.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python -"""Copyright (c) 2018-2024 mundialis GmbH & Co. KG. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -Example core functionality -""" - -__license__ = "GPLv3" -__author__ = "Anika Weinmann" -__copyright__ = "Copyright 2022 mundialis GmbH & Co. KG" -__maintainer__ = "mundialis GmbH & Co. KG" - - -def transform_input(inp: str) -> str: - """Return a transformed string as example core function. - - Args: - inp (str): Input string to transform - - Returns: - (str) transformed string - - """ - return f"Hello world {inp.upper()}!" diff --git a/src/actinia_processing_lib/endpoints.py b/src/actinia_processing_lib/endpoints.py deleted file mode 100644 index 1f59db9..0000000 --- a/src/actinia_processing_lib/endpoints.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python -"""Copyright (c) 2018-2025 mundialis GmbH & Co. KG. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -Add endpoints to flask app with endpoint definitions and routes -""" - -__license__ = "GPLv3" -__author__ = "Carmen Tawalika, Anika Weinmann" -__copyright__ = "Copyright 2022-2024 mundialis GmbH & Co. KG" -__maintainer__ = "mundialis GmbH & Co. KG" - -from actinia_core.endpoints import get_endpoint_class_name -from flask_restful_swagger_2 import Api - -from actinia_processing_lib.api.helloworld import HelloWorld -from actinia_processing_lib.api.project_helloworld import ProjectHelloWorld - - -def create_project_endpoints( - apidoc: Api, - projects_url_part: str = "projects", -) -> None: - """Add resources with "project" inside the endpoint url to the api. - - Args: - apidoc (Api): Flask api - projects_url_part (str): The name of the projects inside the endpoint - URL; to add deprecated location endpoints set - it to "locations" - - """ - apidoc.add_resource( - ProjectHelloWorld, - f"/helloworld/{projects_url_part}/", - endpoint=get_endpoint_class_name(ProjectHelloWorld, projects_url_part), - ) - - -# endpoints loaded if run as actinia-core plugin as well as standalone app -def create_endpoints(flask_api: Api) -> None: - """Create plugin endpoints.""" - apidoc = flask_api - - apidoc.add_resource(HelloWorld, "/helloworld") - - # add deprecated location endpoints - create_project_endpoints(apidoc, projects_url_part="locations") - - # add project endpoints - create_project_endpoints(apidoc, projects_url_part="projects") diff --git a/src/actinia_processing_lib/model/__init__.py b/src/actinia_processing_lib/model/__init__.py deleted file mode 100644 index 44415e8..0000000 --- a/src/actinia_processing_lib/model/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""actinia-example-plguin model part of package. - -This part provides the model part of the actinia-processing-lib. -""" diff --git a/src/actinia_processing_lib/model/response_models.py b/src/actinia_processing_lib/model/response_models.py deleted file mode 100644 index 47a97d9..0000000 --- a/src/actinia_processing_lib/model/response_models.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python -"""Copyright (c) 2018-2024 mundialis GmbH & Co. KG. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -Response models -""" - -__license__ = "GPLv3" -__author__ = "Anika Weinmann" -__copyright__ = "Copyright 2022 mundialis GmbH & Co. KG" -__maintainer__ = "mundialis GmbH & Co. KG" - - -from typing import ClassVar - -from flask_restful_swagger_2 import Schema - - -class SimpleStatusCodeResponseModel(Schema): - """Simple response schema to inform about status.""" - - type: str = "object" - properties: ClassVar[dict] = { - "status": { - "type": "number", - "description": "The status code of the request.", - }, - "message": { - "type": "string", - "description": "A short message to describes the status", - }, - } - required: ClassVar[list[str]] = ["status", "message"] - - -simple_response_example = SimpleStatusCodeResponseModel( - status=200, - message="success", -) -SimpleStatusCodeResponseModel.example = simple_response_example diff --git a/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py b/src/actinia_processing_lib/processing/actinia_processing/ephemeral/persistent_processing.py similarity index 99% rename from src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py rename to src/actinia_processing_lib/processing/actinia_processing/ephemeral/persistent_processing.py index 21df14e..ff9af79 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral/persistent_processing.py +++ b/src/actinia_processing_lib/processing/actinia_processing/ephemeral/persistent_processing.py @@ -29,12 +29,13 @@ import shutil import subprocess -from actinia_core.processing.actinia_processing.ephemeral_processing import ( +from actinia_processing_lib.processing.actinia_processing.ephemeral_processing import ( EphemeralProcessing, ) -from actinia_core.core.common.exceptions import AsyncProcessError from actinia_core.core.mapset_merge_utils import change_mapsetname +from actinia_processing_lib.core.common.exceptions import AsyncProcessError + __license__ = "GPLv3" __author__ = "Sören Gebbert, Guido Riembauer, Anika Weinmann, Lina Krisztian" __copyright__ = ( diff --git a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py b/src/actinia_processing_lib/processing/actinia_processing/ephemeral_processing.py similarity index 99% rename from src/actinia_core/processing/actinia_processing/ephemeral_processing.py rename to src/actinia_processing_lib/processing/actinia_processing/ephemeral_processing.py index ae69f22..96ca8bb 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_processing_lib/processing/actinia_processing/ephemeral_processing.py @@ -51,12 +51,7 @@ get_param_stdin_part, ProcessChainConverter, ) -from actinia_core.core.common.exceptions import ( - AsyncProcessError, - AsyncProcessTermination, - RsyncError, -) -from actinia_core.core.common.exceptions import AsyncProcessTimeLimit +from actinia_core.core.common.exceptions import RsyncError from actinia_core.models.response_models import ( ProcessingResponseModel, ExceptionTracebackModel, @@ -71,6 +66,12 @@ check_project_mapset_module_access, ) +from actinia_processing_lib.core.common.exceptions import ( + AsyncProcessError, + AsyncProcessTermination, + AsyncProcessTimeLimit, +) + __license__ = "GPLv3" __author__ = "Sören Gebbert, Anika Weinmann, Lina Krisztian" __copyright__ = ( diff --git a/src/actinia_core/processing/common/utils.py b/src/actinia_processing_lib/processing/common/utils.py similarity index 100% rename from src/actinia_core/processing/common/utils.py rename to src/actinia_processing_lib/processing/common/utils.py diff --git a/src/actinia_processing_lib/wsgi.py b/src/actinia_processing_lib/wsgi.py deleted file mode 100644 index c885f74..0000000 --- a/src/actinia_processing_lib/wsgi.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env python -"""Copyright (c) 2018-2024 mundialis GmbH & Co. KG. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -""" - -__license__ = "GPLv3" -__author__ = "Carmen Tawalika, Anika Weinmann" -__copyright__ = "Copyright 2022 mundialis GmbH & Co. KG" -__maintainer__ = "mundialis GmbH & Co. KG" From ce9c1dee5bf59bb16d9848a4daf6c954b3346a8d Mon Sep 17 00:00:00 2001 From: Carmen Date: Mon, 2 Jun 2025 18:19:54 +0200 Subject: [PATCH 33/46] lint --- .flake8 | 4 + ruff.toml | 133 ++++++- .../core/common/exceptions.py | 29 +- .../ephemeral/persistent_processing.py | 189 ++++----- .../ephemeral_processing.py | 360 ++++++++++-------- .../processing/common/utils.py | 12 +- 6 files changed, 439 insertions(+), 288 deletions(-) diff --git a/.flake8 b/.flake8 index e21fafd..80ef952 100644 --- a/.flake8 +++ b/.flake8 @@ -4,3 +4,7 @@ # E501 line too long (83 > 79 characters) exclude = .git,.pycache,build,.eggs + +per-file-ignores = + ./src/actinia_processing_lib/processing/actinia_processing/ephemeral_processing.py: E501 + ./src/actinia_processing_lib/processing/actinia_processing/ephemeral/persistent_processing.py: E501 diff --git a/ruff.toml b/ruff.toml index f43f568..0a46830 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,15 +1,122 @@ -# PLR0913 too-many-arguments (of function definition) -# PLR0917 too-many-positional-arguments (of function definition) -# PLW0603 (checks for use of) global-statement -# S107 hardcoded-password-default -# S606 start-process-with-no-shell -# FA102 future-required-type-annotation -# PLC0415 import-outside-top-level -# PLR2004 magic-value-comparison (unnamed numerical constants ("magic") values) - -lint.ignore = ["PLR0913", "PLR0917", "PLW0603", "S107", "S606", "FA102"] +lint.ignore = ["D104",] [lint.per-file-ignores] -"tests/testsuite.py" = [ "PLC0415",] -"tests/integrationtests/test_helloworld.py" = [ "PLR2004",] -"tests/integrationtests/test_projecthelloworld.py" = [ "PLR2004",] +"src/actinia_processing_lib/processing/actinia_processing/ephemeral_processing.py" = [ + "A001", + "A002", + "ANN001", + "ANN201", + "ANN202", + "ANN204", + "ARG002", + "B007", + "B018", + "B904", + "BLE001", + "D104", + "D107", + "D200", + "D205", + "D400", + "D401", + "D404", + "D415", + "D417", + "E501", + "FBT002", + "FBT003", + "FURB113", + "G002", + "G003", + "G004", + "ISC003", + "N818", + "PERF203", + "PERF401", + "PLC0415", + "PLR0912", + "PLR0914", + "PLR0915", + "PLR1702", + "PLR1714", + "PLR2004", + "PLR6104", + "PLR6201", + "PLR6301", + "PLW1514", + "PLW2901", + "PTH102", + "PTH107", + "PTH110", + "PTH112", + "PTH113", + "PTH118", + "PTH119", + "PTH120", + "PTH123", + "PTH208", + "RUF015", + "S110", + "S301", + "S307", + "S403", + "S404", + "S603", + "SIM102", + "SIM105", + "SIM115", + "TRY003", + "TRY301", + "TRY400", + "UP031", +] +"src/actinia_processing_lib/processing/actinia_processing/ephemeral/persistent_processing.py" = [ + "ANN001", + "ANN201", + "ANN202", + "ARG002", + "B904", + "BLE001", + "D104", + "D107", + "D205", + "D401", + "D417", + "E501", + "FBT002", + "G004", + "ISC003", + "PLR6301", + "PTH110", + "PTH112", + "PTH118", + "S307", + "S404", + "S603", + "SIM102", + "TRY003", + "TRY301", + "UP031", +] +"tests/*" = [ + "PLR0913", + "PLR0917", + "PLR2004", + "PLW0603", + "S107", + "S606", +] +"src/actinia_processing_lib/core/common/exceptions.py" = [ + "ANN001", + "D107", + "D205", + "N818", +] +"src/actinia_processing_lib/processing/common/utils.py" = [ + "ANN001", + "ANN201", + "D205", + "D401", + "D417", + "ISC003", +] diff --git a/src/actinia_processing_lib/core/common/exceptions.py b/src/actinia_processing_lib/core/common/exceptions.py index 61bcacd..05e89c1 100644 --- a/src/actinia_processing_lib/core/common/exceptions.py +++ b/src/actinia_processing_lib/core/common/exceptions.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- ####### # actinia-core - an open source REST API for scalable, distributed, high # performance processing of geographical data that uses GRASS GIS for @@ -21,9 +20,8 @@ # ####### -""" -Actinia Core Exceptions that should be used in case an error occurs that is -related to the Actinia Core functionality +"""Actinia Core Exceptions that should be used in case an error occurs that is +related to the Actinia Core functionality. """ __license__ = "GPLv3" @@ -36,12 +34,12 @@ class AsyncProcessError(Exception): - """ - Raise this exception in case the asynchronous processing faces an error + """Raise this exception in case the asynchronous processing faces an + error. """ - def __init__(self, message): - message = "%s: %s" % (str(self.__class__.__name__), message) + def __init__(self, message) -> None: + message = f"{self.__class__.__name__!s}: {message}" Exception.__init__(self, message) @@ -54,21 +52,20 @@ def __init__(self, message): class AsyncProcessTermination(Exception): - """ - Raise this exception in case the termination requests was executed - successfully + """Raise this exception in case the termination requests was executed + successfully. """ - def __init__(self, message): - message = "%s: %s" % (str(self.__class__.__name__), message) + def __init__(self, message) -> None: + message = f"{self.__class__.__name__!s}: {message}" Exception.__init__(self, message) class AsyncProcessTimeLimit(Exception): - """Raise this exception in case the process time limit was reached""" + """Raise this exception in case the process time limit was reached.""" - def __init__(self, message): - message = "%s: %s" % (str(self.__class__.__name__), message) + def __init__(self, message) -> None: + message = f"{self.__class__.__name__!s}: {message}" Exception.__init__(self, message) diff --git a/src/actinia_processing_lib/processing/actinia_processing/ephemeral/persistent_processing.py b/src/actinia_processing_lib/processing/actinia_processing/ephemeral/persistent_processing.py index ff9af79..f7509a9 100644 --- a/src/actinia_processing_lib/processing/actinia_processing/ephemeral/persistent_processing.py +++ b/src/actinia_processing_lib/processing/actinia_processing/ephemeral/persistent_processing.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- ####### # actinia-core - an open source REST API for scalable, distributed, high # performance processing of geographical data that uses GRASS GIS for @@ -21,20 +20,19 @@ # ####### -""" -Asynchronous computation in specific temporary generated and then copied -or original mapsets +"""Asynchronous computation in specific temporary generated and then copied +or original mapsets. """ import os import shutil import subprocess -from actinia_processing_lib.processing.actinia_processing.ephemeral_processing import ( - EphemeralProcessing, -) from actinia_core.core.mapset_merge_utils import change_mapsetname from actinia_processing_lib.core.common.exceptions import AsyncProcessError +from actinia_processing_lib.processing.actinia_processing.ephemeral_processing import ( + EphemeralProcessing, +) __license__ = "GPLv3" __author__ = "Sören Gebbert, Guido Riembauer, Anika Weinmann, Lina Krisztian" @@ -78,15 +76,14 @@ class PersistentProcessing(EphemeralProcessing): """ - def __init__(self, rdc): - """Constructor + def __init__(self, rdc) -> None: + """Constructor. Args: rdc (ResourceDataContainer): The data container that contains all required variables for processing """ - EphemeralProcessing.__init__(self, rdc) self.target_mapset_name = self.mapset_name self.target_mapset_exists = False # By default the target mapset @@ -98,16 +95,25 @@ def __init__(self, rdc): # We have two mapset lock ids. The target mapset and the temporary # mapset self.target_mapset_lock_id = self._generate_mapset_lock_id( - self.user_group, self.project_name, self.target_mapset_name + self.user_group, + self.project_name, + self.target_mapset_name, ) self.temp_mapset_lock_id = self._generate_mapset_lock_id( - self.user_group, self.project_name, self.temp_mapset_name + self.user_group, + self.project_name, + self.temp_mapset_name, ) self.temp_mapset_lock_set = False - def _generate_mapset_lock_id(self, user_group, project_name, mapset_name): - """Generate a unique id to lock a mapset in the kvdb database + def _generate_mapset_lock_id( + self, + user_group, + project_name, + mapset_name, + ) -> str: + """Generate a unique id to lock a mapset in the kvdb database. Projects are user group specific. Hence different user groups may have projects with the same names and with equal mapset names. @@ -125,10 +131,10 @@ def _generate_mapset_lock_id(self, user_group, project_name, mapset_name): The lock id """ - return "%s/%s/%s" % (user_group, project_name, mapset_name) + return f"{user_group}/{project_name}/{mapset_name}" - def _lock_temp_mapset(self): - """Lock the temporary mapset + def _lock_temp_mapset(self) -> None: + """Lock the temporary mapset. This method sets in case of success: self.tmp_mapset_lock_set = True """ @@ -141,18 +147,18 @@ def _lock_temp_mapset(self): if ret == 0: raise AsyncProcessError( - "Unable to lock temporary mapset <%s>, " - "resource is already locked" % self.target_mapset_name + f"Unable to lock temporary mapset <{self.target_mapset_name}>, " + "resource is already locked", ) self.message_logger.info( - "Mapset <%s> locked" % self.target_mapset_name + f"Mapset <{self.target_mapset_name}> locked", ) # if we manage to come here, the lock was correctly set self.temp_mapset_lock_set = True def _check_mapset(self, mapset): - """Check if the target mapset exists + """Check if the target mapset exists. This method will check if the target mapset exists in the global and user group projects. @@ -178,28 +184,31 @@ def _check_mapset(self, mapset): os.path.exists(self.global_project_path) and os.path.isdir(self.global_project_path) and os.access( - self.global_project_path, os.R_OK | os.X_OK | os.W_OK + self.global_project_path, + os.R_OK | os.X_OK | os.W_OK, ) is True ): self.orig_mapset_path = os.path.join( - self.global_project_path, mapset + self.global_project_path, + mapset, ) if os.path.exists(self.orig_mapset_path) is True: if ( os.access( - self.orig_mapset_path, os.R_OK | os.X_OK | os.W_OK + self.orig_mapset_path, + os.R_OK | os.X_OK | os.W_OK, ) is True ): raise AsyncProcessError( - "Mapset <%s> exists in the global " - "dataset and can not be modified." % mapset + f"Mapset <{mapset}> exists in the global " + "dataset and can not be modified.", ) else: raise AsyncProcessError( - "Unable to access global project <%s>" % self.project_name + f"Unable to access global project <{self.project_name}>", ) # Always check if the target mapset already exists and set the flag @@ -211,13 +220,15 @@ def _check_mapset(self, mapset): is True ): self.orig_mapset_path = os.path.join( - self.user_project_path, mapset + self.user_project_path, + mapset, ) if os.path.exists(self.orig_mapset_path) is True: if ( os.access( - self.orig_mapset_path, os.R_OK | os.X_OK | os.W_OK + self.orig_mapset_path, + os.R_OK | os.X_OK | os.W_OK, ) is True ): @@ -227,20 +238,20 @@ def _check_mapset(self, mapset): self.required_mapsets.append(mapset) else: raise AsyncProcessError( - "Unable to access mapset <%s> " - "path %s" % (mapset, self.orig_mapset_path) + f"Unable to access mapset <{mapset}> " + f"path {self.orig_mapset_path}", ) else: mapset_exists = False else: raise AsyncProcessError( - "Unable to access user project <%s>" % self.project_name + f"Unable to access user project <{self.project_name}>", ) return mapset_exists - def _check_target_mapset_exists(self): - """Check if the target mapset exists + def _check_target_mapset_exists(self) -> None: + """Check if the target mapset exists. This method will check if the target mapset exists in the global and user project. @@ -253,13 +264,13 @@ def _check_target_mapset_exists(self): Raises: AsyncProcessError + """ self.target_mapset_exists = self._check_mapset(self.target_mapset_name) - def _check_lock_target_mapset(self): - """ - Check if the target mapset exists and lock it, then lock the temporary - mapset + def _check_lock_target_mapset(self) -> None: + """Check if the target mapset exists and lock it, then lock the temporary + mapset. This method will check if the target mapset exists in the global and user project. @@ -278,14 +289,13 @@ def _check_lock_target_mapset(self): self._check_target_mapset_exists() self._lock_target_mapset() - def _lock_target_mapset(self): - """Lock the target mapset + def _lock_target_mapset(self) -> None: + """Lock the target mapset. Raises: AsyncProcessError """ - # Lock the mapset for the time that the user can allocate at maximum ret = self.lock_interface.lock( resource_id=self.target_mapset_lock_id, @@ -294,27 +304,25 @@ def _lock_target_mapset(self): if ret == 0: raise AsyncProcessError( - "Unable to lock project/mapset <%s/%s>, " - "resource is already locked" - % (self.project_name, self.target_mapset_name) + f"Unable to lock project/mapset <{self.project_name}/{self.target_mapset_name}>, " + "resource is already locked", ) self.message_logger.info( - "project/mapset <%s/%s> locked" - % (self.project_name, self.target_mapset_name) + f"project/mapset <{self.project_name}/{self.target_mapset_name}> locked", ) # if we manage to come here, the lock was correctly set self.target_mapset_lock_set = True - def _merge_mapset_into_target(self, source_mapset, target_mapset): - """Link the source mapset content into the target mapset + def _merge_mapset_into_target(self, source_mapset, target_mapset) -> None: + """Link the source mapset content into the target mapset. Attention: Not all directories and files in the mapset are copied. See list directories. """ self.message_logger.info( - "Copy source mapset <%s> content " - "into the target mapset <%s>" % (source_mapset, target_mapset) + f"Copy source mapset <{source_mapset}> content " + f"into the target mapset <{target_mapset}>", ) # Raster, vector, group and space time data set directories/files @@ -336,7 +344,9 @@ def _merge_mapset_into_target(self, source_mapset, target_mapset): for directory in directories: source_path = os.path.join( - self.user_project_path, source_mapset, directory + self.user_project_path, + source_mapset, + directory, ) target_path = os.path.join(self.user_project_path, target_mapset) @@ -357,8 +367,8 @@ def _merge_mapset_into_target(self, source_mapset, target_mapset): [ "/bin/cp", "-flr", - "%s" % source_path, - "%s/." % target_path, + f"{source_path}", + f"{target_path}/.", ], stdout=stdout, stderr=stderr, @@ -367,44 +377,41 @@ def _merge_mapset_into_target(self, source_mapset, target_mapset): if p.returncode != 0: raise AsyncProcessError( "Unable to merge mapsets. Error in linking:" - " stdout: %s stderr: %s" % (stdout_buff, stderr_buff) + f" stdout: {stdout_buff} stderr: {stderr_buff}", ) - def _copy_merge_tmp_mapset_to_target_mapset(self): - """Copy the temporary mapset into the original project + def _copy_merge_tmp_mapset_to_target_mapset(self) -> None: + """Copy the temporary mapset into the original project. In case the mapset does not exists, then use the target mapset name, otherwise use the temporary mapset name for copying which is later on merged into the target mapset and then removed """ - # Extent the mapset lock for an hour, since copying can take long if self.target_mapset_lock_set is True: ret = self.lock_interface.extend( - resource_id=self.target_mapset_lock_id, expiration=3600 + resource_id=self.target_mapset_lock_id, + expiration=3600, ) if ret == 0: raise AsyncProcessError( "Unable to extend lock for mapset " - "<%s>" % self.target_mapset_name + f"<{self.target_mapset_name}>", ) if self.temp_mapset_lock_set is True: ret = self.lock_interface.extend( - resource_id=self.temp_mapset_lock_id, expiration=3600 + resource_id=self.temp_mapset_lock_id, + expiration=3600, ) if ret == 0: raise AsyncProcessError( "Unable to extend lock for " - "temporary mapset <%s>" % self.temp_mapset_name + f"temporary mapset <{self.temp_mapset_name}>", ) self.message_logger.info( - "Copy temporary mapset from %s to %s" - % ( - self.temp_mapset_path, - os.path.join(self.user_project_path, self.target_mapset_name), - ) + f"Copy temporary mapset from {self.temp_mapset_path} to {os.path.join(self.user_project_path, self.target_mapset_name)}", ) source_path = self.temp_mapset_path @@ -415,16 +422,17 @@ def _copy_merge_tmp_mapset_to_target_mapset(self): if self.target_mapset_exists is True: target_path = self.user_project_path + "/." message = ( - "Copy temporary mapset <%s> to target project " - "<%s>" % (self.temp_mapset_name, self.project_name) + f"Copy temporary mapset <{self.temp_mapset_name}> to target project " + f"<{self.project_name}>" ) else: target_path = os.path.join( - self.user_project_path, self.target_mapset_name + self.user_project_path, + self.target_mapset_name, ) message = ( - "Copy temporary mapset <%s> to target project " - "<%s>" % (self.target_mapset_name, self.project_name) + f"Copy temporary mapset <{self.target_mapset_name}> to target project " + f"<{self.project_name}>" ) self._send_resource_update(message) @@ -433,7 +441,7 @@ def _copy_merge_tmp_mapset_to_target_mapset(self): stdout = subprocess.PIPE stderr = subprocess.PIPE p = subprocess.Popen( - ["/bin/cp", "-fr", "%s" % source_path, "%s" % target_path], + ["/bin/cp", "-fr", f"{source_path}", f"{target_path}"], stdout=stdout, stderr=stderr, ) @@ -443,22 +451,23 @@ def _copy_merge_tmp_mapset_to_target_mapset(self): "Unable to copy temporary mapset to " "original project. Copy error " "stdout: %s stderr: %s returncode: %i" - % (stdout_buff, stderr_buff, p.returncode) + % (stdout_buff, stderr_buff, p.returncode), ) except Exception as e: raise AsyncProcessError( "Unable to copy temporary mapset to " - "original project. Exception %s" % str(e) + f"original project. Exception {e!s}", ) # Merge the temp mapset into the target mapset in case the target # already exists if self.target_mapset_exists is True: self._merge_mapset_into_target( - self.temp_mapset_name, self.target_mapset_name + self.temp_mapset_name, + self.target_mapset_name, ) shutil.rmtree( - os.path.join(self.user_project_path, self.temp_mapset_name) + os.path.join(self.user_project_path, self.temp_mapset_name), ) # remove interim results if self.interim_result.saving_interim_results is True: @@ -467,13 +476,13 @@ def _copy_merge_tmp_mapset_to_target_mapset(self): self.resource_id, ) self.message_logger.info( - "Remove interim results %s" % interim_dir + f"Remove interim results {interim_dir}", ) if os.path.isdir(interim_dir): shutil.rmtree(interim_dir) - def _execute_process_list(self, process_list): - """Extend the mapset lock and execute the provided process list + def _execute_process_list(self, process_list) -> None: + """Extend the mapset lock and execute the provided process list. Args: process_list: The process list to execute @@ -481,6 +490,7 @@ def _execute_process_list(self, process_list): Raises: This method will raise an AsyncProcessError or AsyncProcessTermination + """ for process in process_list: # Extent the lock for each process by max processing time * 2 @@ -491,8 +501,7 @@ def _execute_process_list(self, process_list): ) if ret == 0: raise AsyncProcessError( - "Unable to extend lock for mapset <%s>" - % self.target_mapset_name + f"Unable to extend lock for mapset <{self.target_mapset_name}>", ) if self.temp_mapset_lock_set is True: @@ -504,7 +513,7 @@ def _execute_process_list(self, process_list): if ret == 0: raise AsyncProcessError( "Unable to extend lock for " - "temporary mapset <%s>" % self.temp_mapset_name + f"temporary mapset <{self.temp_mapset_name}>", ) if process.exec_type == "grass": @@ -514,8 +523,8 @@ def _execute_process_list(self, process_list): elif process.exec_type == "python": eval(process.executable) - def _execute(self, skip_permission_check=False): - """Overwrite this function in subclasses + def _execute(self, skip_permission_check=False) -> None: + """Overwrite this function in subclasses. - Call self._setup() - Analyse the process chain @@ -528,7 +537,6 @@ def _execute(self, skip_permission_check=False): - Cleanup and unlock the mapset """ - # Setup the user credentials and logger self._setup() # check if this is a job resumption @@ -549,7 +557,8 @@ def _execute(self, skip_permission_check=False): interim_result_mapset, interim_result_file_path, ) = self.interim_result.check_interim_result_mapset( - pc_step, self.rdc.iteration - 1 + pc_step, + self.rdc.iteration - 1, ) else: # Create the process chain @@ -583,7 +592,7 @@ def _execute(self, skip_permission_check=False): else: # Init GRASS environment and create the temporary mapset self._create_temporary_grass_environment( - source_mapset_name=self.target_mapset_name + source_mapset_name=self.target_mapset_name, ) self._lock_temp_mapset() @@ -595,10 +604,8 @@ def _execute(self, skip_permission_check=False): # Parse the module sdtout outputs and create the results self._parse_module_outputs() - def _final_cleanup(self): - """ - Final cleanup called in the run function at the very end of processing - """ + def _final_cleanup(self) -> None: + """Final cleanup called in the run function at the very end of processing.""" # Clean up and remove the temporary gisdbase self._cleanup() # Unlock the mapsets diff --git a/src/actinia_processing_lib/processing/actinia_processing/ephemeral_processing.py b/src/actinia_processing_lib/processing/actinia_processing/ephemeral_processing.py index 96ca8bb..aae9d7d 100644 --- a/src/actinia_processing_lib/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_processing_lib/processing/actinia_processing/ephemeral_processing.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- ####### # actinia-core - an open source REST API for scalable, distributed, high # performance processing of geographical data that uses GRASS GIS for @@ -21,14 +20,12 @@ # ####### -""" -Base class for asynchronous processing +"""Base class for asynchronous processing """ import math import os import pickle -import requests import shutil import subprocess import sys @@ -37,34 +34,32 @@ import traceback import uuid -from flask import json -from requests.auth import HTTPBasicAuth - -from actinia_core.core.common.config import global_config, DEFAULT_CONFIG_PATH +import requests +from actinia_core.core.common.config import DEFAULT_CONFIG_PATH, global_config +from actinia_core.core.common.exceptions import RsyncError +from actinia_core.core.common.process_chain import ( + ProcessChainConverter, + get_param_stdin_part, +) from actinia_core.core.common.process_object import Process from actinia_core.core.grass_init import GrassInitializer -from actinia_core.core.messages_logger import MessageLogger +from actinia_core.core.interim_results import InterimResult, get_directory_size from actinia_core.core.kvdb_lock import KvdbLockingInterface -from actinia_core.core.resources_logger import ResourceLogger from actinia_core.core.mapset_merge_utils import change_mapsetname -from actinia_core.core.common.process_chain import ( - get_param_stdin_part, - ProcessChainConverter, -) -from actinia_core.core.common.exceptions import RsyncError +from actinia_core.core.messages_logger import MessageLogger +from actinia_core.core.resources_logger import ResourceLogger from actinia_core.models.response_models import ( - ProcessingResponseModel, ExceptionTracebackModel, -) -from actinia_core.models.response_models import ( - create_response_from_model, + ProcessingResponseModel, ProcessLogModel, ProgressInfoModel, + create_response_from_model, ) -from actinia_core.core.interim_results import InterimResult, get_directory_size from actinia_core.rest.base.user_auth import ( check_project_mapset_module_access, ) +from flask import json +from requests.auth import HTTPBasicAuth from actinia_processing_lib.core.common.exceptions import ( AsyncProcessError, @@ -80,7 +75,7 @@ __maintainer__ = "mundialis GmbH & Co. KG" -class EphemeralProcessing(object): +class EphemeralProcessing: """This class processes GRASS data on the local machine in a temporary mapset. @@ -148,7 +143,7 @@ def __init__(self, rdc): self.rdc = rdc if os.path.exists(DEFAULT_CONFIG_PATH) is True and os.path.isfile( - DEFAULT_CONFIG_PATH + DEFAULT_CONFIG_PATH, ): self.config = global_config self.rdc.config = self.config @@ -456,7 +451,6 @@ def _send_to_database(self, document, final=False): (no update) to activate the webhook call """ - self.resource_logger.commit( user_id=self.user_id, resource_id=self.resource_id, @@ -482,7 +476,7 @@ def _send_to_database(self, document, final=False): print(str(run_state)) self.message_logger.error( "Unable to send webhook request. Traceback: %s" - % str(run_state) + % str(run_state), ) def _post_to_webhook(self, document, type): @@ -494,10 +488,11 @@ def _post_to_webhook(self, document, type): Args: document (str): The response document type (str): The webhook type: 'finished' or 'update' + """ self.message_logger.info( "Send POST request to %s webhook url: %s" - % (type, self.webhook_finished) + % (type, self.webhook_finished), ) webhook_url = None if type == "finished": @@ -533,7 +528,7 @@ def _post_to_webhook(self, document, type): json=json.dumps(response_model), timeout=10, ) - if not (500 <= resp.status_code and resp.status_code < 600): + if not (resp.status_code >= 500 and resp.status_code < 600): webhook_not_reached = False except Exception: time.sleep(webhook_sleep) @@ -541,7 +536,7 @@ def _post_to_webhook(self, document, type): webhook_not_reached is False and resp.status_code not in [200, 204] ) or webhook_not_reached is True: raise AsyncProcessError( - "Unable to access %s webhook URL %s" % (type, webhook_url) + "Unable to access %s webhook URL %s" % (type, webhook_url), ) def _get_previous_iteration_process_chain(self): @@ -552,6 +547,7 @@ def _get_previous_iteration_process_chain(self): pc_step (int): The number of the step in the process chain where to continue old_process_chain (dict): The process chain of the old resource run + """ # check old resource pc_step = 0 @@ -559,11 +555,14 @@ def _get_previous_iteration_process_chain(self): for iter in range(1, self.rdc.iteration): if iter == 1: old_response_data = self.resource_logger.get( - self.user_id, self.resource_id + self.user_id, + self.resource_id, ) else: old_response_data = self.resource_logger.get( - self.user_id, self.resource_id, iter + self.user_id, + self.resource_id, + iter, ) if old_response_data is None: return None @@ -583,8 +582,7 @@ def _validate_process_chain( old_process_chain=None, pc_step=None, ): - """ - Create the process list and check for user permissions. + """Create the process list and check for user permissions. The following permissions are checked: @@ -608,11 +606,11 @@ def _validate_process_chain( Returns: list: The process list - """ + """ if old_process_chain is not None: self.proc_chain_converter.process_chain_to_process_list( - old_process_chain + old_process_chain, ) self.proc_chain_converter.import_descr_list = list() self.proc_chain_converter.resource_export_list = list() @@ -621,14 +619,14 @@ def _validate_process_chain( if process_chain is None: process_list = ( self.proc_chain_converter.process_chain_to_process_list( - self.request_data + self.request_data, ) ) self.process_chain_list.append(self.request_data) else: process_list = ( self.proc_chain_converter.process_chain_to_process_list( - process_chain + process_chain, ) ) self.process_chain_list.append(process_chain) @@ -664,7 +662,7 @@ def _validate_process_chain( raise AsyncProcessError( "Process limit exceeded, a maximum of %i " "processes are allowed in the process chain." - % self.process_num_limit + % self.process_num_limit, ) # Check if the module description was correct and if the @@ -685,7 +683,7 @@ def _validate_process_chain( if resp is not None: raise AsyncProcessError( "Module or executable <%s> is not supported" - % process.executable + % process.executable, ) else: message = ( @@ -739,8 +737,7 @@ def _setup(self, init_grass=True): # The setup should only be executed once if self.setup_flag is True: return - else: - self.setup_flag = True + self.setup_flag = True # fluent sender for this subprocess fluent_sender = None @@ -761,7 +758,8 @@ def _setup(self, init_grass=True): ): kwargs["password"] = self.config.KVDB_SERVER_PW self.resource_logger = ResourceLogger( - **kwargs, fluent_sender=fluent_sender + **kwargs, + fluent_sender=fluent_sender, ) self.message_logger = MessageLogger( @@ -774,7 +772,7 @@ def _setup(self, init_grass=True): self.lock_interface.connect(**kwargs) del kwargs self.process_time_limit = int( - self.user_credentials["permissions"]["process_time_limit"] + self.user_credentials["permissions"]["process_time_limit"], ) # Check and create all required paths to global, user and temporary @@ -797,23 +795,26 @@ def _setup(self, init_grass=True): def _setup_paths(self): """Helper method to setup the paths""" self.cell_limit = int( - self.user_credentials["permissions"]["cell_limit"] + self.user_credentials["permissions"]["cell_limit"], ) self.process_num_limit = int( - self.user_credentials["permissions"]["process_num_limit"] + self.user_credentials["permissions"]["process_num_limit"], ) # Setup the required paths self.temp_grass_data_base = os.path.join( - self.grass_temp_database, self.temp_grass_data_base_name + self.grass_temp_database, + self.temp_grass_data_base_name, ) self.temp_file_path = os.path.join(self.temp_grass_data_base, ".tmp") if self.project_name: self.temp_project_path = os.path.join( - self.temp_grass_data_base, self.project_name + self.temp_grass_data_base, + self.project_name, ) self.global_project_path = os.path.join( - self.grass_data_base, self.project_name + self.grass_data_base, + self.project_name, ) # Create the user database path if it does not exist if not os.path.exists(self.grass_user_data_base): @@ -821,14 +822,16 @@ def _setup_paths(self): # Create the user group specific path, if it does not exist and set # the grass user database path accordingly self.grass_user_data_base = os.path.join( - self.grass_user_data_base, self.user_group + self.grass_user_data_base, + self.user_group, ) if not os.path.exists(self.grass_user_data_base): os.mkdir(self.grass_user_data_base) # Create the user group specific project path, if it does not # exist self.user_project_path = os.path.join( - self.grass_user_data_base, self.project_name + self.grass_user_data_base, + self.project_name, ) if not os.path.exists(self.user_project_path): os.mkdir(self.user_project_path) @@ -904,7 +907,7 @@ def _create_temp_database(self, mapsets=None): "Unable to link all required mapsets into " "temporary project. Missing or un-accessible " f"mapset <{mapset}> in project " - f"<{self.project_name}>" + f"<{self.project_name}>", ) # Link the original mapsets from global and user database into the @@ -922,7 +925,7 @@ def _create_temp_database(self, mapsets=None): except Exception as e: raise AsyncProcessError( "Unable to create a temporary GIS database" - ", Exception: %s" % str(e) + ", Exception: %s" % str(e), ) def _link_mapsets(self, mapsets, mapsets_to_link, check_all_mapsets): @@ -937,6 +940,7 @@ def _link_mapsets(self, mapsets, mapsets_to_link, check_all_mapsets): Returns: mapsets (list): List of mapsets in project mapsets_to_link (list): List of mapsets paths to link + """ # Global project mapset linking if self.is_global_database is True: @@ -989,6 +993,7 @@ def _list_all_available_mapsets( Returns: mapsets (list): List of mapsets in project mapsets_to_link (list): List of mapsets paths to link + """ if os.path.isdir(project_path): if check_all_mapsets is True: @@ -996,7 +1001,8 @@ def _list_all_available_mapsets( for mapset in mapsets: mapset_path = os.path.join(project_path, mapset) if os.path.isdir(mapset_path) and os.access( - mapset_path, os.R_OK & os.X_OK + mapset_path, + os.R_OK & os.X_OK, ): # Check if a WIND file exists to be sure it is a mapset if ( @@ -1022,7 +1028,7 @@ def _list_all_available_mapsets( else: raise AsyncProcessError( "Invalid mapset <%s> in project <%s>" - % (mapset, self.project_name) + % (mapset, self.project_name), ) else: if global_db is True: @@ -1035,7 +1041,9 @@ def _list_all_available_mapsets( return mapsets, mapsets_to_link def _create_grass_environment( - self, grass_data_base, mapset_name="PERMANENT" + self, + grass_data_base, + mapset_name="PERMANENT", ): """Sets up the GRASS environment to run modules @@ -1050,7 +1058,7 @@ def _create_grass_environment( """ self.message_logger.info( "Initlialize GRASS grass_data_base: %s; project: %s; mapset: %s" - % (grass_data_base, self.project_name, mapset_name) + % (grass_data_base, self.project_name, mapset_name), ) self.ginit = GrassInitializer( @@ -1107,14 +1115,15 @@ def _create_temporary_mapset( """ self.temp_mapset_path = os.path.join( - self.temp_project_path, temp_mapset_name + self.temp_project_path, + temp_mapset_name, ) # if interim_result_mapset is set copy the mapset from the interim # results if interim_result_mapset: self.message_logger.info( - "Rsync interim result mapset to temporary GRASS DB" + "Rsync interim result mapset to temporary GRASS DB", ) # change mapset name for groups, raster VRTs and tgis for directory in ["group", "cell_misc", "tgis"]: @@ -1125,31 +1134,34 @@ def _create_temporary_mapset( os.path.basename(self.temp_mapset_path), ) rsync_status = self.interim_result.rsync_mapsets( - interim_result_mapset, self.temp_mapset_path + interim_result_mapset, + self.temp_mapset_path, ) if rsync_status != "success": raise RsyncError( "Error while rsyncing of interim results to new temporare " - "mapset" + "mapset", ) self.interim_result.rsync_additional_mapsets( - os.path.dirname(self.temp_mapset_path) + os.path.dirname(self.temp_mapset_path), ) if interim_result_file_path: self.message_logger.info( - "Rsync interim result file path to temporary GRASS DB" + "Rsync interim result file path to temporary GRASS DB", ) rsync_status = self.interim_result.rsync_mapsets( - interim_result_file_path, self.temp_file_path + interim_result_file_path, + self.temp_file_path, ) if rsync_status != "success": raise RsyncError( "Error while rsyncing of interim temporary file path to " - "new temporare file path" + "new temporare file path", ) self.ginit.run_module( - "g.mapset", ["-c", "mapset=%s" % temp_mapset_name] + "g.mapset", + ["-c", "mapset=%s" % temp_mapset_name], ) if self.required_mapsets: @@ -1163,7 +1175,7 @@ def _create_temporary_mapset( self.message_logger.info( "Added the following mapsets to the mapset " - "search path: " + ",".join(self.required_mapsets) + "search path: " + ",".join(self.required_mapsets), ) # Set the vector database connection to vector map specific databases @@ -1183,7 +1195,8 @@ def _create_temporary_mapset( # to the temporary mapset if source_mapset_name is not None and interim_result_mapset is None: source_mapset_path = os.path.join( - self.temp_project_path, source_mapset_name + self.temp_project_path, + source_mapset_name, ) if os.path.exists(os.path.join(source_mapset_path, "WIND")): shutil.copyfile( @@ -1211,7 +1224,7 @@ def _cleanup(self): os.remove(tmpfile) except Exception as e: self.message_logger.debug( - f"Temporary file {tmpfile} can't be removed: {e}" + f"Temporary file {tmpfile} can't be removed: {e}", ) def _check_pixellimit_rimport(self, process_executable_params): @@ -1241,11 +1254,12 @@ def _check_pixellimit_rimport(self, process_executable_params): if extent_region: # first query region extents errorid, stdout_gregion, _ = self.ginit.run_module( - "g.region", ["-ug"] + "g.region", + ["-ug"], ) if errorid != 0: raise AsyncProcessError( - "Unable to check the computational region size" + "Unable to check the computational region size", ) # parse region extents for creation of vrt (-te flag from gdalbuildvrt) list_out_gregion = stdout_gregion.split("\n") @@ -1267,7 +1281,8 @@ def _check_pixellimit_rimport(self, process_executable_params): # gdalinfo for created vrt gdalinfo_params = [vrt_out] errorid, stdout_gdalinfo, _ = self.ginit.run_module( - "/usr/bin/gdalinfo", gdalinfo_params + "/usr/bin/gdalinfo", + gdalinfo_params, ) # parse "Size" output of gdalinfo rastersize_list = ( @@ -1287,7 +1302,8 @@ def _check_pixellimit_rimport(self, process_executable_params): if rimport_res and (rastersize < self.cell_limit): # determine estimated resolution errorid, _, stderr_estres = self.ginit.run_module( - "r.import", [vrt_out, "-e"] + "r.import", + [vrt_out, "-e"], ) if "Estimated" in stderr_estres: # if data in different projection get rest_est with output of r.import -e @@ -1311,8 +1327,8 @@ def _check_pixellimit_rimport(self, process_executable_params): x for x in process_executable_params if "resolution_value=" in x - ][0].split("=")[1] - ) + ][0].split("=")[1], + ), ] * 2 elif resolution == "region": # if already queried above reuse, otherwise execute g.region command @@ -1327,12 +1343,12 @@ def _check_pixellimit_rimport(self, process_executable_params): res_val_ns = float( [x for x in stdout_gregion.split("\n") if "nsres=" in x][ 0 - ].split("=")[1] + ].split("=")[1], ) res_val_ew = float( [x for x in stdout_gregion.split("\n") if "ewres=" in x][ 0 - ].split("=")[1] + ].split("=")[1], ) res_val = [res_val_ns, res_val_ew] if res_val: @@ -1351,7 +1367,7 @@ def _check_pixellimit_rimport(self, process_executable_params): if rastersize > self.cell_limit: raise AsyncProcessError( "Processing pixel limit exceeded for raster import. " - "Please set e.g. region smaller." + "Please set e.g. region smaller.", ) def _check_reset_region(self): @@ -1372,7 +1388,7 @@ def _check_reset_region(self): if errorid != 0: raise AsyncProcessError( - "Unable to check the computational region size" + "Unable to check the computational region size", ) str_list = stdout_buff.split() @@ -1409,17 +1425,18 @@ def _adjust_region_size(self, num_cells, ns_res, ew_res): ns_res = ns_res * fak ew_res = ew_res * fak errorid, stdout_buff, stderr_buff = self.ginit.run_module( - "g.region", ["nsres=%f" % ns_res, "ewres=%f" % ew_res, "-g"] + "g.region", + ["nsres=%f" % ns_res, "ewres=%f" % ew_res, "-g"], ) self.message_logger.info(stdout_buff) if errorid != 0: raise AsyncProcessError( "Unable to adjust the region settings to nsres: " - "%f ewres: %f error: %s" % (ns_res, ew_res, stderr_buff) + "%f ewres: %f error: %s" % (ns_res, ew_res, stderr_buff), ) raise AsyncProcessError( "Region too large, set a coarser resolution to minimum nsres: " - "%f ewres: %f [num_cells: %d]" % (ns_res, ew_res, num_cells) + "%f ewres: %f [num_cells: %d]" % (ns_res, ew_res, num_cells), ) def _increment_progress(self, num=1): @@ -1427,6 +1444,7 @@ def _increment_progress(self, num=1): Args: num (int): The number for which the progress should be increased + """ self.progress_steps += num self.progress["step"] = self.progress_steps @@ -1436,6 +1454,7 @@ def _add_actinia_process(self, process: Process): Args: process: The actinia process + """ self.actinia_process_dict[process.id] = process self.actinia_process_list.append(process) @@ -1446,15 +1465,19 @@ def _update_num_of_steps(self, num): Args: num: The number of processes to be added to the total number of processes + """ self.number_of_processes += num self.progress["num_of_steps"] = self.number_of_processes def _wait_for_process( - self, module_name, module_parameter, proc, poll_time + self, + module_name, + module_parameter, + proc, + poll_time, ): - """ - Wait for a specific process. Catch termination requests, process + """Wait for a specific process. Catch termination requests, process time limits and send updates to the user. Args: @@ -1468,7 +1491,6 @@ def _wait_for_process( The run time in seconds """ - start_time = time.time() termination_check_count = 0 @@ -1476,56 +1498,56 @@ def _wait_for_process( while True: if proc.poll() is not None: break - else: - # Sleep some time and update the resource status - time.sleep(poll_time) - termination_check_count += 1 - update_check_count += 1 - - # Check all 10 loops for termination - if termination_check_count == 10: - termination_check_count = 0 - # check if the resource should be terminated - # and kill the current process - if ( - self.resource_logger.get_termination( - self.user_id, self.resource_id, self.iteration - ) - is True - ): - proc.kill() - raise AsyncProcessTermination( - "Process <%s> was terminated " - "by user request" % module_name - ) + # Sleep some time and update the resource status + time.sleep(poll_time) + termination_check_count += 1 + update_check_count += 1 + + # Check all 10 loops for termination + if termination_check_count == 10: + termination_check_count = 0 + # check if the resource should be terminated + # and kill the current process + if ( + self.resource_logger.get_termination( + self.user_id, + self.resource_id, + self.iteration, + ) + is True + ): + proc.kill() + raise AsyncProcessTermination( + "Process <%s> was terminated " + "by user request" % module_name, + ) - # Send all 100 loops a status update - if update_check_count == 100: - update_check_count = 0 - # Check max runtime of process - curr_time = time.time() - if (curr_time - start_time) > self.process_time_limit: - proc.kill() - raise AsyncProcessTimeLimit( - "Time (%i seconds) exceeded to run executable %s" - % (self.process_time_limit, module_name) - ) - # Reduce the length of the command line parameters for - # lesser logging overhead - mparams = str(module_parameter) - if len(mparams) > 100: - mparams = "%s ... %s" % (mparams[0:50], mparams[-50:]) - message = ( - f"Running executable {module_name} with parameters " - f"{mparams} for {curr_time - start_time} seconds" + # Send all 100 loops a status update + if update_check_count == 100: + update_check_count = 0 + # Check max runtime of process + curr_time = time.time() + if (curr_time - start_time) > self.process_time_limit: + proc.kill() + raise AsyncProcessTimeLimit( + "Time (%i seconds) exceeded to run executable %s" + % (self.process_time_limit, module_name), ) - self._send_resource_update(message) + # Reduce the length of the command line parameters for + # lesser logging overhead + mparams = str(module_parameter) + if len(mparams) > 100: + mparams = "%s ... %s" % (mparams[0:50], mparams[-50:]) + message = ( + f"Running executable {module_name} with parameters " + f"{mparams} for {curr_time - start_time} seconds" + ) + self._send_resource_update(message) return time.time() - start_time def _run_process(self, process, poll_time=0.05): - """ - Run a process actinia_core.core.common.process_object.Process) with + """Run a process actinia_core.core.common.process_object.Process) with options and send progress updates to the resource database. IMPORTANT: Use this method to run programs that are not GRASS modules. @@ -1553,13 +1575,15 @@ def _run_process(self, process, poll_time=0.05): """ if ( self.resource_logger.get_termination( - self.user_id, self.resource_id, self.iteration + self.user_id, + self.resource_id, + self.iteration, ) is True ): raise AsyncProcessTermination( "Process <%s> was terminated by " - "user request" % process.executable + "user request" % process.executable, ) return self._run_executable(process, poll_time) @@ -1612,13 +1636,15 @@ def _run_module(self, process, poll_time=0.05): if self.process_count % 20 == 0: if ( self.resource_logger.get_termination( - self.user_id, self.resource_id, self.iteration + self.user_id, + self.resource_id, + self.iteration, ) is True ): raise AsyncProcessTermination( "Process <%s> was terminated " - "by user request" % process.executable + "by user request" % process.executable, ) message = "Running module %s with parameters %s" % ( @@ -1674,13 +1700,16 @@ def _run_executable(self, process, poll_time=0.005): (returncode, stdout_buff, stderr_buff) """ - # Use temporary files to catch stdout and stderr stdout_buff = tempfile.NamedTemporaryFile( - mode="w+b", delete=True, dir=self.temp_file_path + mode="w+b", + delete=True, + dir=self.temp_file_path, ) stderr_buff = tempfile.NamedTemporaryFile( - mode="w+b", delete=True, dir=self.temp_file_path + mode="w+b", + delete=True, + dir=self.temp_file_path, ) stdin_file = None @@ -1699,7 +1728,7 @@ def _run_executable(self, process, poll_time=0.005): # filter stdout/stderr if "::" in val_splitted[j]: filter = get_param_stdin_part( - val_splitted[j][2:] + val_splitted[j][2:], ) if "=" not in par_val: raise AsyncProcessError( @@ -1707,17 +1736,18 @@ def _run_executable(self, process, poll_time=0.005): f"<{process.executable}>: <{filter}> " "cannot be selected. Maybe you have to " "set the '-g' flag for the stdout/stderr " - "module." + "module.", ) filtered_par_value = { x.split("=")[0]: x.split("=")[1] for x in par_val.split() }[filter] filtered_func_name += f"::{filter}" - process.executable_params[i] = ( - process.executable_params[i].replace( - filtered_func_name, filtered_par_value - ) + process.executable_params[ + i + ] = process.executable_params[i].replace( + filtered_func_name, + filtered_par_value, ) if process.stdin_source is not None: @@ -1725,7 +1755,7 @@ def _run_executable(self, process, poll_time=0.005): stdin_file = open(tmp_file, "w") stdin_file.write(process.stdin_source()) stdin_file.close() - stdin_file = open(tmp_file, "r") + stdin_file = open(tmp_file) self._increment_progress(num=1) @@ -1753,7 +1783,10 @@ def _run_executable(self, process, poll_time=0.005): ) run_time = self._wait_for_process( - process.executable, process.executable_params, proc, poll_time + process.executable, + process.executable_params, + proc, + poll_time, ) proc.wait() @@ -1793,7 +1826,7 @@ def _run_executable(self, process, poll_time=0.005): if proc.returncode != 0: raise AsyncProcessError( - "Error while running executable <%s>" % process.executable + "Error while running executable <%s>" % process.executable, ) # save interim results @@ -1802,12 +1835,14 @@ def _run_executable(self, process, poll_time=0.005): and self.temp_mapset_path is not None ): self.interim_result.save_interim_results( - self.progress_steps, self.temp_mapset_path, self.temp_file_path + self.progress_steps, + self.temp_mapset_path, + self.temp_file_path, ) elif self.temp_mapset_path is None: self.message_logger.debug( "No temp mapset path set. Because of that no interim results" - " can be saved!" + " can be saved!", ) return proc.returncode, stdout_string, stderr_string @@ -1839,8 +1874,8 @@ def _create_temporary_grass_environment( temporary file path Raises: This method will raise an AsyncProcessError - """ + """ # Create the temp database and link the # required mapsets into it self._create_temp_database(self.required_mapsets) @@ -1848,7 +1883,8 @@ def _create_temporary_grass_environment( # Initialize the GRASS environment and switch into PERMANENT # mapset, which is always linked self._create_grass_environment( - grass_data_base=self.temp_grass_data_base, mapset_name="PERMANENT" + grass_data_base=self.temp_grass_data_base, + mapset_name="PERMANENT", ) # Create the temporary mapset and switch into it @@ -1885,12 +1921,12 @@ def _execute(self, skip_permission_check=False): # Create the process chain if self.rdc.iteration is not None: process_list = self._create_temporary_grass_environment_and_process_list_for_iteration( - skip_permission_check=skip_permission_check + skip_permission_check=skip_permission_check, ) else: process_list = ( self._create_temporary_grass_environment_and_process_list( - skip_permission_check=skip_permission_check + skip_permission_check=skip_permission_check, ) ) @@ -1900,7 +1936,9 @@ def _execute(self, skip_permission_check=False): self._parse_module_outputs() def _create_temporary_grass_environment_and_process_list_for_iteration( - self, process_chain=None, skip_permission_check=False + self, + process_chain=None, + skip_permission_check=False, ): """Helper method to: @@ -1945,7 +1983,8 @@ def _create_temporary_grass_environment_and_process_list_for_iteration( interim_result_mapset, interim_result_file_path, ) = self.interim_result.check_interim_result_mapset( - pc_step, self.iteration - 1 + pc_step, + self.iteration - 1, ) if interim_result_mapset is None: return None @@ -1959,7 +1998,9 @@ def _create_temporary_grass_environment_and_process_list_for_iteration( return process_list def _create_temporary_grass_environment_and_process_list( - self, process_chain=None, skip_permission_check=False + self, + process_chain=None, + skip_permission_check=False, ): """Helper method to: @@ -2007,7 +2048,6 @@ def _parse_module_outputs(self): provided id of the StdoutParser. """ - for entry in self.output_parser_list: for process_id, stdout_def in entry.items(): id = stdout_def["id"] @@ -2015,7 +2055,7 @@ def _parse_module_outputs(self): delimiter = stdout_def["delimiter"] if process_id not in self.module_output_dict: raise AsyncProcessError( - "Unable to find process id in module output dictionary" + "Unable to find process id in module output dictionary", ) stdout = self.module_output_dict[process_id]["stdout"] # Split the rows by the \n new line delimiter @@ -2065,8 +2105,7 @@ def _parse_module_outputs(self): self.module_results[id] = result def _execute_process_list(self, process_list): - """ - Run all modules or executables that are specified in the process list + """Run all modules or executables that are specified in the process list Args: process_list: The process list that was generated by @@ -2088,7 +2127,6 @@ def _execute_process_list(self, process_list): def _interim_results(self): """Check if interim results should be saved or cleaned up""" - if ( "error" in self.run_state and self.interim_result.saving_interim_results == "onError" @@ -2133,7 +2171,6 @@ def run(self): e_traceback)] message = pprint.pformat(message) """ - try: # Run the _execute function that does all the work self._execute() @@ -2182,16 +2219,17 @@ def run(self): # After all processing finished, send the final status if "success" in self.run_state: self._send_resource_finished( - message=self.finish_message, results=self.module_results + message=self.finish_message, + results=self.module_results, ) elif "terminated" in self.run_state: # Send an error message if an exception was raised self._send_resource_terminated( - message=self.run_state["terminated"] + message=self.run_state["terminated"], ) elif "time limit exceeded" in self.run_state: self._send_resource_time_limit_exceeded( - message=self.run_state["time limit exceeded"] + message=self.run_state["time limit exceeded"], ) elif "error" in self.run_state: # Send an error message if an exception was raised diff --git a/src/actinia_processing_lib/processing/common/utils.py b/src/actinia_processing_lib/processing/common/utils.py index a32ff25..b8a790a 100644 --- a/src/actinia_processing_lib/processing/common/utils.py +++ b/src/actinia_processing_lib/processing/common/utils.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- ####### # actinia-core - an open source REST API for scalable, distributed, high # performance processing of geographical data that uses GRASS GIS for @@ -21,11 +20,10 @@ # ####### -""" -Utils for processing -""" +"""Utils for processing.""" import importlib + from actinia_core.core.common.config import global_config __license__ = "GPLv3" @@ -54,11 +52,11 @@ def try_import(actinia_module, actinia_class): Returns: module: python module used for processing + """ try: imported_module = importlib.import_module(actinia_module, package=None) - imported_class = getattr(imported_module, actinia_class) - return imported_class + return getattr(imported_module, actinia_class) except ImportError as e: # ModuleNotFoundError is a subclass and caught here as well. if global_config.QUEUE_TYPE == "local": @@ -66,4 +64,4 @@ def try_import(actinia_module, actinia_class): " - No actinia_processing found but required " + "for local queue!" ) - raise e + raise From 777fae9175a26c9f974110f752a4db0b62fcb900 Mon Sep 17 00:00:00 2001 From: Carmen Tawalika Date: Fri, 1 Apr 2022 11:34:10 +0300 Subject: [PATCH 34/46] Refactor rest - part 2 (#322) * reactivate redis queue * rename variable * make queue type configurable * enhance dev setup for redis queue * lint * move user_auth * move user auth * move base_login * move map_layer_base * move renderer_base * move resource_base * fix relative imports * lint * first splitup: raster_colors * splitup download_cache_management * splitup ephemeral_custom_processing * splitup ephemeral_processing_with_export * splitup ephemeral_processing * splitup renderer_base * splitup persistent_processing * splitup location_management * splitup map_layer_management * splitup mapset_management * splitup persistent_mapset_merger * splitup process_validation * splitup raster_export * splitup raster_layer * splitup raster_legend * lint * splitup raster_renderer * splitup resource_storage_management * splitup strds_management * splitup strds_raster_management * splitup strds_renderer * splitup vector_layer * splitup vector_renderer * lint * make inheritance more clear * add readme * fix import --- .../ephemeral_processing_with_export.py | 483 ++++++++++++++++++ 1 file changed, 483 insertions(+) create mode 100644 src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py diff --git a/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py b/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py new file mode 100644 index 0000000..a06d4d6 --- /dev/null +++ b/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py @@ -0,0 +1,483 @@ +# -*- coding: utf-8 -*- +####### +# actinia-core - an open source REST API for scalable, distributed, high +# performance processing of geographical data that uses GRASS GIS for +# computational tasks. For details, see https://actinia.mundialis.de/ +# +# Copyright (c) 2016-2022 Sören Gebbert and mundialis GmbH & Co. KG +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +####### + +""" +Asynchronous computation in specific temporary generated mapsets +with export of required map layers. +""" +import os +from actinia_core.processing.actinia_processing.ephemeral_processing \ + import EphemeralProcessing +from actinia_core.core.common.process_object import Process +from actinia_core.core.common.exceptions import AsyncProcessTermination +from actinia_core.core.stac_exporter_interface import STACExporter + +__license__ = "GPLv3" +__author__ = "Sören Gebbert" +__copyright__ = "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" +__maintainer__ = "mundialis" +__email__ = "info@mundialis.de" + + +class EphemeralProcessingWithExport(EphemeralProcessing): + """ + This class processes GRASS data on the local machine in an temporary mapset + and copies the exported results to a dedicated storage location. + + The temporary mapset will be removed by this class when the processing finished + and the results are stored in the dedicated storage location. + + TODO: Implement the export of arbitrary files that were generated in the + processing of the process chain + """ + def __init__(self, rdc): + """ + Setup the variables of this class + + Args: + rdc (ResourceDataContainer): The data container that contains all + required variables for processing + + """ + EphemeralProcessing.__init__(self, rdc) + # Create the storage interface to store the exported resources + self.storage_interface = rdc.create_storage_interface() + + def _export_raster(self, raster_name, + format="COG", + additional_options=[], + use_raster_region=False): + """Export a specific raster layer with r.out.gdal as GeoTiff. + + The result is stored in a temporary directory + that is located in the temporary grass database. + + The region of the raster layer can be used for export. In this case a + temporary region will be used for export, so that the original region + of the mapset is not modified. + COG-Driver: https://gdal.org/drivers/raster/cog.html + + Args: + raster_name (str): The name of the raster layer + format (str): COG (default; requires GDAL >= 3.1 on server), GTiff + additional_options (list): Unused + use_raster_region (bool): Use the region of the raster layer for export + + Returns: + tuple: A tuple (file_name, output_path) + + Raises: + AsyncProcessError: If a GRASS module return status is not 0 + + """ + # Export the layer + suffix = ".tif" + # Remove a potential mapset + file_name = raster_name.split("@")[0] + suffix + + if use_raster_region is True: + + p = Process(exec_type="grass", + executable="g.region", + executable_params=["raster=%s" % raster_name, "-g"], + id=f"exporter_region_{raster_name}", + stdin_source=None) + + self._update_num_of_steps(1) + self._run_module(p) + + if format == 'COG': + # check if GDAL has COG driver + from osgeo import gdal + driver_list = [gdal.GetDriver( + i).ShortName for i in range(gdal.GetDriverCount())] + if 'COG' not in driver_list: + format = 'GTiff' + self.message_logger.info("COG driver not available, using GTiff driver") + + # Save the file in the temporary directory of the temporary gisdb + output_path = os.path.join(self.temp_file_path, file_name) + + module_name = "r.out.gdal" + args = [ + "-fmt", "input=%s" % raster_name, "format=%s" % + format, "output=%s" % output_path] + create_opts = "createopt=BIGTIFF=YES,COMPRESS=LZW" + + if format == "GTiff": + # generate overviews with compression: + os.environ['COMPRESS_OVERVIEW'] = "LZW" + args.append("overviews=5") + create_opts += ",TILED=YES" + + args.append(create_opts) + # current workaround due to color table export + # COG bug in GDAL, see https://github.com/OSGeo/gdal/issues/2946 + # TODO: DELETE AND TEST ONCE GDAL 3.1.4 HAS BEEN RELEASED + if format == "COG": + args.append("-c") + + if additional_options: + args.extend(additional_options) + + p = Process(exec_type="grass", + executable=module_name, + executable_params=args, + id=f"exporter_raster_{raster_name}", + stdin_source=None) + + self._update_num_of_steps(1) + self._run_module(p) + + return file_name, output_path + + def _export_strds(self, strds_name, format="GTiff"): + """Export a specific strds layer with t.rast.export. + + The result is stored in a temporary directory + that is located in the temporary grass database. + + Args: + strds_name (str): The name of the strds layer + format (str): GTiff (only option) + + Returns: + tuple: A tuple (file_name, output_path) + + """ + suffix = ".tar.gz" + file_name = strds_name.split("@")[0] + suffix + output_path = os.path.join(self.temp_file_path, file_name) + + if format != 'GTiff': + format = 'GTiff' + self.message_logger.info("Only GTiff driver is supported for STRDS export.") + + module_name = "t.rast.export" + args = [ + "input=%s" % strds_name, + "format=%s" % format, + "output=%s" % output_path, + "directory=%s" % self.temp_file_path, + "compression=%s" % "gzip" + ] + # optimized for GTiff + create_opts = "createopt=BIGTIFF=YES,COMPRESS=LZW,TILED=YES" + args.append(create_opts) + os.environ['COMPRESS_OVERVIEW'] = "LZW" + + p = Process(exec_type="grass", + executable=module_name, + executable_params=args, + id=f"exporter_strds_{strds_name}", + stdin_source=None) + + self._update_num_of_steps(1) + self._run_module(p) + + return file_name, output_path + + def _export_vector(self, vector_name, + format="GPKG", + additional_options=[]): + """Export a specific vector layer with v.out.ogr using a specific output format + + The result is stored in a temporary directory + that is located in the temporary grass database. + + The resulting vector file will always be compressed using zip + + Args: + vector_name (str): The name of the raster layer + format (str): GPKG (default), GML, GeoJSON, ESRI_Shapefile, SQLite, CSV + additional_options (list): Unused + + Returns: + tuple: A tuple (file_name, output_path) + + Raises: + AsyncProcessError: If a GRASS module return status is not 0 + + """ + # Export the layer + prefix = "" + if format == "GPKG": + prefix = ".gpkg" + if format == "GML": + prefix = ".gml" + if format == "GeoJSON": + prefix = ".json" + if format == "ESRI_Shapefile": + prefix = "" + if format == "SQLite": + prefix = ".sqlite" + if format == "CSV": + prefix = ".csv" + + # Remove a potential mapset + file_name = vector_name.split("@")[0] + prefix + archive_name = file_name + ".zip" + # switch into the temporary working directory to use relative path for zip + os.chdir(self.temp_file_path) + + module_name = "v.out.ogr" + args = ["-e", "input=%s" % vector_name, "format=%s" % format, + "output=%s" % file_name] + + if additional_options: + args.extend(additional_options) + + # Export + p = Process(exec_type="grass", + executable=module_name, + executable_params=args, + id=f"exporter_vector_{vector_name}", + stdin_source=None) + + self._update_num_of_steps(1) + self._run_module(p) + + # Compression + compressed_output_path = os.path.join(self.temp_file_path, archive_name) + + executable = "/usr/bin/zip" + args = ["-r", archive_name, file_name] + + p = Process(exec_type="exec", + executable=executable, + executable_params=args, + id=f"exporter_zip_{vector_name}", + stdin_source=None) + + self._update_num_of_steps(1) + self._run_process(p) + + return archive_name, compressed_output_path + + def _export_postgis(self, vector_name, dbstring, + output_layer=None, + additional_options=[]): + """Export a specific vector layer with v.out.postgis to a PostGIS database + + Args: + vector_name (str): The name of the raster layer + dbstring (str): The PostgreSQL database string to connect to the + output database + output_layer (str): The name of the PostgreSQL database table + additional_options (list): Unused + + Raises: + AsyncProcessError: If a GRASS module return status is not 0 + + """ + + module_name = "v.out.postgis" + args = ["-l", "input=%s" % vector_name, "output=%s" % dbstring] + + if output_layer: + args.append("output_layer=%s" % output_layer) + + if additional_options: + args.extend(additional_options) + + # Export + p = Process(exec_type="grass", + executable=module_name, + executable_params=args, + id=f"exporter_postgis_{vector_name}", + stdin_source=None) + + self._update_num_of_steps(1) + self._run_module(p) + + def _export_file(self, tmp_file, file_name): + """Export a specific file + + The result is stored in a temporary directory + that is located in the temporary grass database. + + The output file will always be compressed using zip + + Args: + tmp_file (str): The name of the temporary file generated by a module + file_name (str): The file name to be used for export + + Returns: + tuple: A tuple (file_name, output_path) + + Raises: + AsyncProcessError: If a GRASS module return status is not 0 + + """ + # Export the file + archive_name = file_name + ".zip" + # switch into the temporary working directory to use relative path for zip + os.chdir(self.temp_file_path) + + # Compression + compressed_output_path = os.path.join(self.temp_file_path, archive_name) + + executable = "/usr/bin/zip" + args = ["-r", archive_name, tmp_file] + + p = Process(exec_type="exec", + executable=executable, + executable_params=args, + id=f"exporter_file_{file_name}", + stdin_source=None) + + self._update_num_of_steps(1) + self._run_process(p) + + return archive_name, compressed_output_path + + def _export_resources(self, use_raster_region=False): + """Export all resources that were listed in the process chain description. + + Save all exported files in a temporary directory first, then copy the + data to its destination after the export is finished. + The temporary data will be finally removed. + + At the moment only raster layer export is supported. + + """ + + for resource in self.resource_export_list: + + # print("Check for termination %i" + # % self.resource_logger.get_termination(self.user_id, self.resource_id)) + + # Check for termination requests between the exports + if bool(self.resource_logger.get_termination( + self.user_id, self.resource_id, self.iteration)) is True: + raise AsyncProcessTermination( + "Resource export was terminated by user request") + + # Raster export + if resource["export"]["type"] in ["raster", "vector", "file", "strds"]: + + output_type = resource["export"]["type"] + output_path = None + + # Legacy code + if "name" in resource: + file_name = resource["name"] + if "value" in resource: + file_name = resource["value"] + + if output_type == "raster": + message = "Export raster layer <%s> with format %s" % ( + file_name, resource["export"]["format"]) + self._send_resource_update(message) + output_name, output_path = self._export_raster( + raster_name=file_name, + format=resource["export"]["format"], + use_raster_region=use_raster_region) + + elif output_type == "vector": + if "PostgreSQL" in resource["export"]["format"]: + dbstring = resource["export"]["dbstring"] + output_layer = None + if "output_layer" in resource["export"]: + output_layer = resource["export"]["output_layer"] + + message = "Export vector layer <%s> to PostgreSQL database" % ( + file_name) + self._send_resource_update(message) + self._export_postgis( + vector_name=file_name, dbstring=dbstring, + output_layer=output_layer) + # continue + else: + message = "Export vector layer <%s> with format %s" % ( + file_name, resource["export"]["format"]) + self._send_resource_update(message) + output_name, output_path = self._export_vector( + vector_name=file_name, + format=resource["export"]["format"]) + elif output_type == "file": + file_name = resource["file_name"] + tmp_file = resource["tmp_file"] + output_name, output_path = self._export_file( + tmp_file=tmp_file, file_name=file_name) + elif output_type == "strds": + message = "Export strds layer <%s> with format %s" % ( + file_name, resource["export"]["format"]) + self._send_resource_update(message) + output_name, output_path = self._export_strds( + strds_name=file_name, + format=resource["export"]["format"]) + else: + raise AsyncProcessTermination( + "Unknown export format %s" % output_type) + + message = "Moving generated resources to final destination" + self._send_resource_update(message) + + # Store the temporary file in the resource storage + # and receive the resource URL + if output_path is not None: + resource_url = self.storage_interface.store_resource(output_path) + self.resource_url_list.append(resource_url) + + if "metadata" in resource: + if resource["metadata"]["format"] == "STAC": + stac = STACExporter() + + stac_catalog = stac.stac_builder(resource_url, file_name, + output_type) + self.resource_url_list.append(stac_catalog) + + def _execute(self, skip_permission_check=False): + """Overwrite this function in subclasses + + Overwrite this function in subclasses + + - Setup user credentials + - Setup the storage interface + - Analyse the process chain + - Initialize and create the temporal database and mapset + - Run the modules + - Export the results + - Cleanup + + """ + # Setup the user credentials and logger + self._setup() + + # Create and check the resource directory + self.storage_interface.setup() + + EphemeralProcessing._execute(self) + + # Export all resources and generate the finish response + self._export_resources() + + def _final_cleanup(self): + """Overwrite this function in subclasses to perform the final cleanup + """ + # Clean up and remove the temporary gisdbase + self._cleanup() + # Remove resource directories + if "error" in self.run_state or "terminated" in self.run_state: + self.storage_interface.remove_resources() From 4b9741f455bc1aeee2e5677e7ba24c4651793955 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Thu, 22 Sep 2022 15:19:34 +0200 Subject: [PATCH 35/46] Black (#378) * linting tests + black workflow * linting * black * further black * fix unittests * fix test Co-authored-by: anikaweinmann --- .../ephemeral_processing_with_export.py | 274 +++++++++++------- 1 file changed, 175 insertions(+), 99 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py b/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py index a06d4d6..6da06ad 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py @@ -26,15 +26,18 @@ with export of required map layers. """ import os -from actinia_core.processing.actinia_processing.ephemeral_processing \ - import EphemeralProcessing +from actinia_core.processing.actinia_processing.ephemeral_processing import ( + EphemeralProcessing, +) from actinia_core.core.common.process_object import Process from actinia_core.core.common.exceptions import AsyncProcessTermination from actinia_core.core.stac_exporter_interface import STACExporter __license__ = "GPLv3" __author__ = "Sören Gebbert" -__copyright__ = "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" +__copyright__ = ( + "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" +) __maintainer__ = "mundialis" __email__ = "info@mundialis.de" @@ -44,12 +47,13 @@ class EphemeralProcessingWithExport(EphemeralProcessing): This class processes GRASS data on the local machine in an temporary mapset and copies the exported results to a dedicated storage location. - The temporary mapset will be removed by this class when the processing finished - and the results are stored in the dedicated storage location. + The temporary mapset will be removed by this class when the processing + finished and the results are stored in the dedicated storage location. TODO: Implement the export of arbitrary files that were generated in the processing of the process chain """ + def __init__(self, rdc): """ Setup the variables of this class @@ -63,10 +67,13 @@ def __init__(self, rdc): # Create the storage interface to store the exported resources self.storage_interface = rdc.create_storage_interface() - def _export_raster(self, raster_name, - format="COG", - additional_options=[], - use_raster_region=False): + def _export_raster( + self, + raster_name, + format="COG", + additional_options=[], + use_raster_region=False, + ): """Export a specific raster layer with r.out.gdal as GeoTiff. The result is stored in a temporary directory @@ -81,7 +88,8 @@ def _export_raster(self, raster_name, raster_name (str): The name of the raster layer format (str): COG (default; requires GDAL >= 3.1 on server), GTiff additional_options (list): Unused - use_raster_region (bool): Use the region of the raster layer for export + use_raster_region (bool): Use the region of the raster layer for + export Returns: tuple: A tuple (file_name, output_path) @@ -97,36 +105,46 @@ def _export_raster(self, raster_name, if use_raster_region is True: - p = Process(exec_type="grass", - executable="g.region", - executable_params=["raster=%s" % raster_name, "-g"], - id=f"exporter_region_{raster_name}", - stdin_source=None) + p = Process( + exec_type="grass", + executable="g.region", + executable_params=["raster=%s" % raster_name, "-g"], + id=f"exporter_region_{raster_name}", + stdin_source=None, + ) self._update_num_of_steps(1) self._run_module(p) - if format == 'COG': + if format == "COG": # check if GDAL has COG driver from osgeo import gdal - driver_list = [gdal.GetDriver( - i).ShortName for i in range(gdal.GetDriverCount())] - if 'COG' not in driver_list: - format = 'GTiff' - self.message_logger.info("COG driver not available, using GTiff driver") + + driver_list = [ + gdal.GetDriver(i).ShortName + for i in range(gdal.GetDriverCount()) + ] + if "COG" not in driver_list: + format = "GTiff" + self.message_logger.info( + "COG driver not available, using GTiff driver" + ) # Save the file in the temporary directory of the temporary gisdb output_path = os.path.join(self.temp_file_path, file_name) module_name = "r.out.gdal" args = [ - "-fmt", "input=%s" % raster_name, "format=%s" % - format, "output=%s" % output_path] + "-fmt", + "input=%s" % raster_name, + "format=%s" % format, + "output=%s" % output_path, + ] create_opts = "createopt=BIGTIFF=YES,COMPRESS=LZW" if format == "GTiff": # generate overviews with compression: - os.environ['COMPRESS_OVERVIEW'] = "LZW" + os.environ["COMPRESS_OVERVIEW"] = "LZW" args.append("overviews=5") create_opts += ",TILED=YES" @@ -140,11 +158,13 @@ def _export_raster(self, raster_name, if additional_options: args.extend(additional_options) - p = Process(exec_type="grass", - executable=module_name, - executable_params=args, - id=f"exporter_raster_{raster_name}", - stdin_source=None) + p = Process( + exec_type="grass", + executable=module_name, + executable_params=args, + id=f"exporter_raster_{raster_name}", + stdin_source=None, + ) self._update_num_of_steps(1) self._run_module(p) @@ -169,9 +189,11 @@ def _export_strds(self, strds_name, format="GTiff"): file_name = strds_name.split("@")[0] + suffix output_path = os.path.join(self.temp_file_path, file_name) - if format != 'GTiff': - format = 'GTiff' - self.message_logger.info("Only GTiff driver is supported for STRDS export.") + if format != "GTiff": + format = "GTiff" + self.message_logger.info( + "Only GTiff driver is supported for STRDS export." + ) module_name = "t.rast.export" args = [ @@ -179,28 +201,32 @@ def _export_strds(self, strds_name, format="GTiff"): "format=%s" % format, "output=%s" % output_path, "directory=%s" % self.temp_file_path, - "compression=%s" % "gzip" + "compression=%s" % "gzip", ] # optimized for GTiff create_opts = "createopt=BIGTIFF=YES,COMPRESS=LZW,TILED=YES" args.append(create_opts) - os.environ['COMPRESS_OVERVIEW'] = "LZW" + os.environ["COMPRESS_OVERVIEW"] = "LZW" - p = Process(exec_type="grass", - executable=module_name, - executable_params=args, - id=f"exporter_strds_{strds_name}", - stdin_source=None) + p = Process( + exec_type="grass", + executable=module_name, + executable_params=args, + id=f"exporter_strds_{strds_name}", + stdin_source=None, + ) self._update_num_of_steps(1) self._run_module(p) return file_name, output_path - def _export_vector(self, vector_name, - format="GPKG", - additional_options=[]): - """Export a specific vector layer with v.out.ogr using a specific output format + def _export_vector( + self, vector_name, format="GPKG", additional_options=[] + ): + """ + Export a specific vector layer with v.out.ogr using a specific output + format The result is stored in a temporary directory that is located in the temporary grass database. @@ -209,7 +235,8 @@ def _export_vector(self, vector_name, Args: vector_name (str): The name of the raster layer - format (str): GPKG (default), GML, GeoJSON, ESRI_Shapefile, SQLite, CSV + format (str): GPKG (default), GML, GeoJSON, ESRI_Shapefile, SQLite, + CSV additional_options (list): Unused Returns: @@ -237,47 +264,59 @@ def _export_vector(self, vector_name, # Remove a potential mapset file_name = vector_name.split("@")[0] + prefix archive_name = file_name + ".zip" - # switch into the temporary working directory to use relative path for zip + # switch into the temporary working directory to use relative path for + # zip os.chdir(self.temp_file_path) module_name = "v.out.ogr" - args = ["-e", "input=%s" % vector_name, "format=%s" % format, - "output=%s" % file_name] + args = [ + "-e", + "input=%s" % vector_name, + "format=%s" % format, + "output=%s" % file_name, + ] if additional_options: args.extend(additional_options) # Export - p = Process(exec_type="grass", - executable=module_name, - executable_params=args, - id=f"exporter_vector_{vector_name}", - stdin_source=None) + p = Process( + exec_type="grass", + executable=module_name, + executable_params=args, + id=f"exporter_vector_{vector_name}", + stdin_source=None, + ) self._update_num_of_steps(1) self._run_module(p) # Compression - compressed_output_path = os.path.join(self.temp_file_path, archive_name) + compressed_output_path = os.path.join( + self.temp_file_path, archive_name + ) executable = "/usr/bin/zip" args = ["-r", archive_name, file_name] - p = Process(exec_type="exec", - executable=executable, - executable_params=args, - id=f"exporter_zip_{vector_name}", - stdin_source=None) + p = Process( + exec_type="exec", + executable=executable, + executable_params=args, + id=f"exporter_zip_{vector_name}", + stdin_source=None, + ) self._update_num_of_steps(1) self._run_process(p) return archive_name, compressed_output_path - def _export_postgis(self, vector_name, dbstring, - output_layer=None, - additional_options=[]): - """Export a specific vector layer with v.out.postgis to a PostGIS database + def _export_postgis( + self, vector_name, dbstring, output_layer=None, additional_options=[] + ): + """ + Export a specific vector layer with v.out.postgis to a PostGIS database Args: vector_name (str): The name of the raster layer @@ -301,11 +340,13 @@ def _export_postgis(self, vector_name, dbstring, args.extend(additional_options) # Export - p = Process(exec_type="grass", - executable=module_name, - executable_params=args, - id=f"exporter_postgis_{vector_name}", - stdin_source=None) + p = Process( + exec_type="grass", + executable=module_name, + executable_params=args, + id=f"exporter_postgis_{vector_name}", + stdin_source=None, + ) self._update_num_of_steps(1) self._run_module(p) @@ -319,7 +360,8 @@ def _export_file(self, tmp_file, file_name): The output file will always be compressed using zip Args: - tmp_file (str): The name of the temporary file generated by a module + tmp_file (str): The name of the temporary file generated by a + module file_name (str): The file name to be used for export Returns: @@ -331,20 +373,25 @@ def _export_file(self, tmp_file, file_name): """ # Export the file archive_name = file_name + ".zip" - # switch into the temporary working directory to use relative path for zip + # switch into the temporary working directory to use relative path for + # zip os.chdir(self.temp_file_path) # Compression - compressed_output_path = os.path.join(self.temp_file_path, archive_name) + compressed_output_path = os.path.join( + self.temp_file_path, archive_name + ) executable = "/usr/bin/zip" args = ["-r", archive_name, tmp_file] - p = Process(exec_type="exec", - executable=executable, - executable_params=args, - id=f"exporter_file_{file_name}", - stdin_source=None) + p = Process( + exec_type="exec", + executable=executable, + executable_params=args, + id=f"exporter_file_{file_name}", + stdin_source=None, + ) self._update_num_of_steps(1) self._run_process(p) @@ -352,7 +399,8 @@ def _export_file(self, tmp_file, file_name): return archive_name, compressed_output_path def _export_resources(self, use_raster_region=False): - """Export all resources that were listed in the process chain description. + """ + Export all resources that were listed in the process chain description. Save all exported files in a temporary directory first, then copy the data to its destination after the export is finished. @@ -364,17 +412,26 @@ def _export_resources(self, use_raster_region=False): for resource in self.resource_export_list: - # print("Check for termination %i" - # % self.resource_logger.get_termination(self.user_id, self.resource_id)) - # Check for termination requests between the exports - if bool(self.resource_logger.get_termination( - self.user_id, self.resource_id, self.iteration)) is True: + if ( + bool( + self.resource_logger.get_termination( + self.user_id, self.resource_id, self.iteration + ) + ) + is True + ): raise AsyncProcessTermination( - "Resource export was terminated by user request") + "Resource export was terminated by user request" + ) # Raster export - if resource["export"]["type"] in ["raster", "vector", "file", "strds"]: + if resource["export"]["type"] in [ + "raster", + "vector", + "file", + "strds", + ]: output_type = resource["export"]["type"] output_path = None @@ -387,12 +444,15 @@ def _export_resources(self, use_raster_region=False): if output_type == "raster": message = "Export raster layer <%s> with format %s" % ( - file_name, resource["export"]["format"]) + file_name, + resource["export"]["format"], + ) self._send_resource_update(message) output_name, output_path = self._export_raster( raster_name=file_name, format=resource["export"]["format"], - use_raster_region=use_raster_region) + use_raster_region=use_raster_region, + ) elif output_type == "vector": if "PostgreSQL" in resource["export"]["format"]: @@ -401,35 +461,47 @@ def _export_resources(self, use_raster_region=False): if "output_layer" in resource["export"]: output_layer = resource["export"]["output_layer"] - message = "Export vector layer <%s> to PostgreSQL database" % ( - file_name) + message = ( + "Export vector layer <%s> to PostgreSQL database" + % (file_name) + ) self._send_resource_update(message) self._export_postgis( - vector_name=file_name, dbstring=dbstring, - output_layer=output_layer) + vector_name=file_name, + dbstring=dbstring, + output_layer=output_layer, + ) # continue else: message = "Export vector layer <%s> with format %s" % ( - file_name, resource["export"]["format"]) + file_name, + resource["export"]["format"], + ) self._send_resource_update(message) output_name, output_path = self._export_vector( vector_name=file_name, - format=resource["export"]["format"]) + format=resource["export"]["format"], + ) elif output_type == "file": file_name = resource["file_name"] tmp_file = resource["tmp_file"] output_name, output_path = self._export_file( - tmp_file=tmp_file, file_name=file_name) + tmp_file=tmp_file, file_name=file_name + ) elif output_type == "strds": message = "Export strds layer <%s> with format %s" % ( - file_name, resource["export"]["format"]) + file_name, + resource["export"]["format"], + ) self._send_resource_update(message) output_name, output_path = self._export_strds( strds_name=file_name, - format=resource["export"]["format"]) + format=resource["export"]["format"], + ) else: raise AsyncProcessTermination( - "Unknown export format %s" % output_type) + "Unknown export format %s" % output_type + ) message = "Moving generated resources to final destination" self._send_resource_update(message) @@ -437,15 +509,18 @@ def _export_resources(self, use_raster_region=False): # Store the temporary file in the resource storage # and receive the resource URL if output_path is not None: - resource_url = self.storage_interface.store_resource(output_path) + resource_url = self.storage_interface.store_resource( + output_path + ) self.resource_url_list.append(resource_url) if "metadata" in resource: if resource["metadata"]["format"] == "STAC": stac = STACExporter() - stac_catalog = stac.stac_builder(resource_url, file_name, - output_type) + stac_catalog = stac.stac_builder( + resource_url, file_name, output_type + ) self.resource_url_list.append(stac_catalog) def _execute(self, skip_permission_check=False): @@ -474,7 +549,8 @@ def _execute(self, skip_permission_check=False): self._export_resources() def _final_cleanup(self): - """Overwrite this function in subclasses to perform the final cleanup + """ + Overwrite this function in subclasses to perform the final cleanup """ # Clean up and remove the temporary gisdbase self._cleanup() From fff8601d258f8d004e0cebff4ed5ccbf74499e05 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Thu, 29 Jun 2023 12:14:28 +0200 Subject: [PATCH 36/46] Black (#458) * make resource resource delition asynchron * trailing and ending precommit ... * do not use pylint * trigger linting * linting * linting --------- Co-authored-by: anikaweinmann --- .../ephemeral/ephemeral_processing_with_export.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py b/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py index 6da06ad..258d389 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py @@ -104,7 +104,6 @@ def _export_raster( file_name = raster_name.split("@")[0] + suffix if use_raster_region is True: - p = Process( exec_type="grass", executable="g.region", @@ -411,7 +410,6 @@ def _export_resources(self, use_raster_region=False): """ for resource in self.resource_export_list: - # Check for termination requests between the exports if ( bool( @@ -432,7 +430,6 @@ def _export_resources(self, use_raster_region=False): "file", "strds", ]: - output_type = resource["export"]["type"] output_path = None From 7366ec58b0cedf1e5a6fc0c5ec846b9f128a80f7 Mon Sep 17 00:00:00 2001 From: Carmen Date: Fri, 23 Feb 2024 15:14:13 +0100 Subject: [PATCH 37/46] Merge branch 'main' of github.com:actinia-org/actinia-core into main --- .../ephemeral/ephemeral_processing_with_export.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py b/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py index 258d389..2d71cf3 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py @@ -4,7 +4,7 @@ # performance processing of geographical data that uses GRASS GIS for # computational tasks. For details, see https://actinia.mundialis.de/ # -# Copyright (c) 2016-2022 Sören Gebbert and mundialis GmbH & Co. KG +# Copyright (c) 2016-2024 Sören Gebbert and mundialis GmbH & Co. KG # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -36,7 +36,7 @@ __license__ = "GPLv3" __author__ = "Sören Gebbert" __copyright__ = ( - "Copyright 2016-2022, Sören Gebbert and mundialis GmbH & Co. KG" + "Copyright 2016-2024, Sören Gebbert and mundialis GmbH & Co. KG" ) __maintainer__ = "mundialis" __email__ = "info@mundialis.de" @@ -148,11 +148,6 @@ def _export_raster( create_opts += ",TILED=YES" args.append(create_opts) - # current workaround due to color table export - # COG bug in GDAL, see https://github.com/OSGeo/gdal/issues/2946 - # TODO: DELETE AND TEST ONCE GDAL 3.1.4 HAS BEEN RELEASED - if format == "COG": - args.append("-c") if additional_options: args.extend(additional_options) From 47a30db9a5cd1d8d66f7b488a7db3903b18e7eb1 Mon Sep 17 00:00:00 2001 From: Anika Weinmann <37300249+anikaweinmann@users.noreply.github.com> Date: Wed, 4 Dec 2024 10:59:37 +0100 Subject: [PATCH 38/46] Renamed GRASS GIS locations to projects (#565) * add decorator for deprecated locations * add project endpoints * endpoints for projects * rename location * rename location * unify maintainer * fix GRASS Initialisation * black * fixes for tests and G83 * fixes * add rest of changes * Update tests/test_job_resumption.py * fix endpoints and job resumption tests * Update src/actinia_core/core/common/api_logger.py * Tests for G84 * Test pipelines for G8.3 and G8.4 * Update src/actinia_core/rest/base/resource_base.py * fix test workflow * fix tests for G83 * fix error due to lib update * fix proc name * Update src/actinia_core/endpoints.py Co-authored-by: Carmen Tawalika --------- Co-authored-by: Carmen Tawalika Co-authored-by: Carmen --- .../ephemeral/ephemeral_processing_with_export.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py b/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py index 2d71cf3..205a0aa 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py @@ -34,21 +34,21 @@ from actinia_core.core.stac_exporter_interface import STACExporter __license__ = "GPLv3" -__author__ = "Sören Gebbert" +__author__ = "Sören Gebbert, Anika Weinmann" __copyright__ = ( "Copyright 2016-2024, Sören Gebbert and mundialis GmbH & Co. KG" ) -__maintainer__ = "mundialis" +__maintainer__ = "mundialis GmbH & Co. KG" __email__ = "info@mundialis.de" class EphemeralProcessingWithExport(EphemeralProcessing): """ This class processes GRASS data on the local machine in an temporary mapset - and copies the exported results to a dedicated storage location. + and copies the exported results to a dedicated storage project. The temporary mapset will be removed by this class when the processing - finished and the results are stored in the dedicated storage location. + finished and the results are stored in the dedicated storage project. TODO: Implement the export of arbitrary files that were generated in the processing of the process chain From c072d393c64e0dcfbcb45ff986274977754b9ae0 Mon Sep 17 00:00:00 2001 From: Carmen Tawalika Date: Thu, 5 Dec 2024 10:05:17 +0100 Subject: [PATCH 39/46] Introduce ruff (#574) * rename location * fixes for tests and G83 * fix job resumption tests * linting * initial ruff check * update test imports * lint * F841 and Post-PR * lint * test with wip linting workflow * test post-pr * fix ref * trigger build * always-post-pr * post-pr in separate workflow * rename workflow * fix lint error * trigger pipelines * add ruff config * update post-pr workflow * Update .github/workflows/linting.yml * fix tests --------- Co-authored-by: anikaweinmann --- .../ephemeral/ephemeral_processing_with_export.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py b/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py index 205a0aa..4aa8e55 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py +++ b/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py @@ -440,7 +440,7 @@ def _export_resources(self, use_raster_region=False): resource["export"]["format"], ) self._send_resource_update(message) - output_name, output_path = self._export_raster( + _, output_path = self._export_raster( raster_name=file_name, format=resource["export"]["format"], use_raster_region=use_raster_region, @@ -470,14 +470,14 @@ def _export_resources(self, use_raster_region=False): resource["export"]["format"], ) self._send_resource_update(message) - output_name, output_path = self._export_vector( + _, output_path = self._export_vector( vector_name=file_name, format=resource["export"]["format"], ) elif output_type == "file": file_name = resource["file_name"] tmp_file = resource["tmp_file"] - output_name, output_path = self._export_file( + _, output_path = self._export_file( tmp_file=tmp_file, file_name=file_name ) elif output_type == "strds": @@ -486,7 +486,7 @@ def _export_resources(self, use_raster_region=False): resource["export"]["format"], ) self._send_resource_update(message) - output_name, output_path = self._export_strds( + _, output_path = self._export_strds( strds_name=file_name, format=resource["export"]["format"], ) From f42f3f68ae5221deb045fa8530282dc4a48564cd Mon Sep 17 00:00:00 2001 From: Carmen Date: Wed, 4 Jun 2025 11:10:38 +0200 Subject: [PATCH 40/46] simplify folder structure --- src/actinia_processing_lib/core/__init__.py | 0 src/actinia_processing_lib/core/common/__init__.py | 0 .../actinia_processing => }/ephemeral_processing.py | 2 +- .../ephemeral_processing_with_export.py | 6 ++---- src/actinia_processing_lib/{core/common => }/exceptions.py | 0 .../ephemeral => }/persistent_processing.py | 7 +++---- .../{processing/common => }/utils.py | 0 7 files changed, 6 insertions(+), 9 deletions(-) delete mode 100644 src/actinia_processing_lib/core/__init__.py delete mode 100644 src/actinia_processing_lib/core/common/__init__.py rename src/actinia_processing_lib/{processing/actinia_processing => }/ephemeral_processing.py (99%) rename src/{actinia_core/processing/actinia_processing/ephemeral => actinia_processing_lib}/ephemeral_processing_with_export.py (99%) rename src/actinia_processing_lib/{core/common => }/exceptions.py (100%) rename src/actinia_processing_lib/{processing/actinia_processing/ephemeral => }/persistent_processing.py (99%) rename src/actinia_processing_lib/{processing/common => }/utils.py (100%) diff --git a/src/actinia_processing_lib/core/__init__.py b/src/actinia_processing_lib/core/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/actinia_processing_lib/core/common/__init__.py b/src/actinia_processing_lib/core/common/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/actinia_processing_lib/processing/actinia_processing/ephemeral_processing.py b/src/actinia_processing_lib/ephemeral_processing.py similarity index 99% rename from src/actinia_processing_lib/processing/actinia_processing/ephemeral_processing.py rename to src/actinia_processing_lib/ephemeral_processing.py index aae9d7d..dbee9ac 100644 --- a/src/actinia_processing_lib/processing/actinia_processing/ephemeral_processing.py +++ b/src/actinia_processing_lib/ephemeral_processing.py @@ -61,7 +61,7 @@ from flask import json from requests.auth import HTTPBasicAuth -from actinia_processing_lib.core.common.exceptions import ( +from actinia_processing_lib.exceptions import ( AsyncProcessError, AsyncProcessTermination, AsyncProcessTimeLimit, diff --git a/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py b/src/actinia_processing_lib/ephemeral_processing_with_export.py similarity index 99% rename from src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py rename to src/actinia_processing_lib/ephemeral_processing_with_export.py index 4aa8e55..1c9341c 100644 --- a/src/actinia_core/processing/actinia_processing/ephemeral/ephemeral_processing_with_export.py +++ b/src/actinia_processing_lib/ephemeral_processing_with_export.py @@ -26,12 +26,10 @@ with export of required map layers. """ import os -from actinia_core.processing.actinia_processing.ephemeral_processing import ( - EphemeralProcessing, -) from actinia_core.core.common.process_object import Process -from actinia_core.core.common.exceptions import AsyncProcessTermination from actinia_core.core.stac_exporter_interface import STACExporter +from actinia_processing_lib.ephemeral_processing import EphemeralProcessing +from actinia_processing_lib.exceptions import AsyncProcessTermination __license__ = "GPLv3" __author__ = "Sören Gebbert, Anika Weinmann" diff --git a/src/actinia_processing_lib/core/common/exceptions.py b/src/actinia_processing_lib/exceptions.py similarity index 100% rename from src/actinia_processing_lib/core/common/exceptions.py rename to src/actinia_processing_lib/exceptions.py diff --git a/src/actinia_processing_lib/processing/actinia_processing/ephemeral/persistent_processing.py b/src/actinia_processing_lib/persistent_processing.py similarity index 99% rename from src/actinia_processing_lib/processing/actinia_processing/ephemeral/persistent_processing.py rename to src/actinia_processing_lib/persistent_processing.py index f7509a9..a040062 100644 --- a/src/actinia_processing_lib/processing/actinia_processing/ephemeral/persistent_processing.py +++ b/src/actinia_processing_lib/persistent_processing.py @@ -29,10 +29,9 @@ from actinia_core.core.mapset_merge_utils import change_mapsetname -from actinia_processing_lib.core.common.exceptions import AsyncProcessError -from actinia_processing_lib.processing.actinia_processing.ephemeral_processing import ( - EphemeralProcessing, -) +from actinia_processing_lib.exceptions import AsyncProcessError +from actinia_processing_lib.ephemeral_processing import EphemeralProcessing + __license__ = "GPLv3" __author__ = "Sören Gebbert, Guido Riembauer, Anika Weinmann, Lina Krisztian" diff --git a/src/actinia_processing_lib/processing/common/utils.py b/src/actinia_processing_lib/utils.py similarity index 100% rename from src/actinia_processing_lib/processing/common/utils.py rename to src/actinia_processing_lib/utils.py From fec4f4314316e72d9e7434afc36d8c9afaa51e6e Mon Sep 17 00:00:00 2001 From: Carmen Date: Thu, 5 Jun 2025 15:36:49 +0200 Subject: [PATCH 41/46] lint --- .flake8 | 4 +- ruff.toml | 61 ++++++++++++++----- .../persistent_processing.py | 3 +- 3 files changed, 48 insertions(+), 20 deletions(-) diff --git a/.flake8 b/.flake8 index 80ef952..bea181b 100644 --- a/.flake8 +++ b/.flake8 @@ -6,5 +6,5 @@ exclude = .git,.pycache,build,.eggs per-file-ignores = - ./src/actinia_processing_lib/processing/actinia_processing/ephemeral_processing.py: E501 - ./src/actinia_processing_lib/processing/actinia_processing/ephemeral/persistent_processing.py: E501 + ./src/actinia_processing_lib/ephemeral_processing.py: E501 + ./src/actinia_processing_lib/persistent_processing.py: E501 diff --git a/ruff.toml b/ruff.toml index 0a46830..a75af4d 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,7 +1,36 @@ lint.ignore = ["D104",] [lint.per-file-ignores] -"src/actinia_processing_lib/processing/actinia_processing/ephemeral_processing.py" = [ +"src/actinia_processing_lib/ephemeral_processing_with_export.py" = [ + "A001", + "A002", + "ANN001", + "ANN202", + "ANN204", + "ARG002", + "B006", + "COM812", + "D200", + "D202", + "D205", + "D212", + "D400", + "D401", + "D404", + "D415", + "FBT002", + "I001", + "PLC0415", + "PLR0912", + "PLR6201", + "PTH118", + "SIM102", + "SLF001", + "TRY003", + "UP009", + "UP031", +] +"src/actinia_processing_lib/ephemeral_processing.py" = [ "A001", "A002", "ANN001", @@ -70,7 +99,13 @@ lint.ignore = ["D104",] "TRY400", "UP031", ] -"src/actinia_processing_lib/processing/actinia_processing/ephemeral/persistent_processing.py" = [ +"src/actinia_processing_lib/exceptions.py" = [ + "ANN001", + "D107", + "D205", + "N818", +] +"src/actinia_processing_lib/persistent_processing.py" = [ "ANN001", "ANN201", "ANN202", @@ -98,6 +133,14 @@ lint.ignore = ["D104",] "TRY301", "UP031", ] +"src/actinia_processing_lib/utils.py" = [ + "ANN001", + "ANN201", + "D205", + "D401", + "D417", + "ISC003", +] "tests/*" = [ "PLR0913", "PLR0917", @@ -106,17 +149,3 @@ lint.ignore = ["D104",] "S107", "S606", ] -"src/actinia_processing_lib/core/common/exceptions.py" = [ - "ANN001", - "D107", - "D205", - "N818", -] -"src/actinia_processing_lib/processing/common/utils.py" = [ - "ANN001", - "ANN201", - "D205", - "D401", - "D417", - "ISC003", -] diff --git a/src/actinia_processing_lib/persistent_processing.py b/src/actinia_processing_lib/persistent_processing.py index a040062..c180a99 100644 --- a/src/actinia_processing_lib/persistent_processing.py +++ b/src/actinia_processing_lib/persistent_processing.py @@ -29,9 +29,8 @@ from actinia_core.core.mapset_merge_utils import change_mapsetname -from actinia_processing_lib.exceptions import AsyncProcessError from actinia_processing_lib.ephemeral_processing import EphemeralProcessing - +from actinia_processing_lib.exceptions import AsyncProcessError __license__ = "GPLv3" __author__ = "Sören Gebbert, Guido Riembauer, Anika Weinmann, Lina Krisztian" From bef809e72f95d45568262787cf34fef5f46c3571 Mon Sep 17 00:00:00 2001 From: Carmen Date: Fri, 6 Jun 2025 14:18:05 +0200 Subject: [PATCH 42/46] fix tests --- docker/Dockerfile | 24 - docker/actinia-processing-lib-test/Dockerfile | 27 +- .../actinia-processing-lib-test.cfg | 2 +- docker/actinia.cfg | 28 - docker/docker-compose.yml | 42 - docker/valkey_data/config/.valkey | 1 - docker/valkey_data/config/valkey.conf | 2420 ----------------- .../test_ephemeral_processing.py | 118 + tests/integrationtests/test_helloworld.py | 94 - .../test_projecthelloworld.py | 136 - tests/unittests/test_transformation.py | 38 - tests_with_kvdb.sh | 25 +- 12 files changed, 132 insertions(+), 2823 deletions(-) delete mode 100644 docker/Dockerfile delete mode 100644 docker/actinia.cfg delete mode 100644 docker/docker-compose.yml delete mode 100644 docker/valkey_data/config/.valkey delete mode 100644 docker/valkey_data/config/valkey.conf create mode 100644 tests/integrationtests/test_ephemeral_processing.py delete mode 100644 tests/integrationtests/test_helloworld.py delete mode 100644 tests/integrationtests/test_projecthelloworld.py delete mode 100644 tests/unittests/test_transformation.py diff --git a/docker/Dockerfile b/docker/Dockerfile deleted file mode 100644 index 3800f18..0000000 --- a/docker/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -# Exception for hadolint-docker-linter: -# DL3007: using latest is prone to errors if the image will ever update. Pin the version explicitly to a release tag -# hadolint ignore=DL3007 -FROM mundialis/actinia:latest - -# pwgen is needed for the tests -RUN pip3 install --no-cache-dir pwgen==0.8.2.post0 - -COPY docker/actinia.cfg /etc/default/actinia -COPY src /src/actinia-processing-lib/src/ -COPY setup.cfg /src/actinia-processing-lib/ -COPY setup.py /src/actinia-processing-lib/ -COPY requirements.txt /src/actinia-processing-lib/ - -RUN pip3 install --no-cache-dir -r /src/actinia-processing-lib/requirements.txt && \ - pip3 uninstall actinia-processing-lib.wsgi -y -# SETUPTOOLS_SCM_PRETEND_VERSION is only needed if in the plugin folder is no -# .git folder -ENV SETUPTOOLS_SCM_PRETEND_VERSION=0.0 - -WORKDIR /src/actinia-processing-lib -RUN pip3 install --no-cache-dir -e . - -WORKDIR /src/actinia_core diff --git a/docker/actinia-processing-lib-test/Dockerfile b/docker/actinia-processing-lib-test/Dockerfile index 89b1dee..c7b3b6c 100644 --- a/docker/actinia-processing-lib-test/Dockerfile +++ b/docker/actinia-processing-lib-test/Dockerfile @@ -1,33 +1,18 @@ # Exception for hadolint-docker-linter: # DL3007: using latest is prone to errors if the image will ever update. Pin the version explicitly to a release tag # hadolint ignore=DL3007 -FROM mundialis/actinia:latest AS actinia_test - -LABEL authors="Carmen Tawalika,Anika Weinmann" -LABEL maintainer="tawalika@mundialis.de,weinmann@mundialis.de" +FROM mundialis/actinia:latest ENV ACTINIA_CUSTOM_TEST_CFG=/etc/default/actinia-processing-lib-test - # TODO do not set DEFAULT_CONFIG_PATH if this is fixed ENV DEFAULT_CONFIG_PATH=/etc/default/actinia-processing-lib-test -# install things only for tests -# DL3018: Pin versions in apk add # hadolint ignore=DL3018 -RUN apk add --no-cache valkey && \ - pip3 install --no-cache-dir iniconfig==2.0.0 colorlog==6.8.2 pwgen==0.8.2.post0 pytest==8.3.5 pytest-cov==6.0.0 - -# COPY docker/actinia-processing-lib-test/start.sh /src/start.sh +RUN apk add valkey +RUN pip3 install --no-cache-dir \ + iniconfig==2.0.0 pytest==8.3.5 pytest-cov==6.0.0 -ENTRYPOINT ["/bin/sh"] -CMD ["/src/start.sh"] - -# # add data for tests -# RUN wget --quiet https://grass.osgeo.org/sampledata/north_carolina/nc_spm_08_micro.zip && \ -# unzip nc_spm_08_micro.zip && \ -# rm -f nc_spm_08_micro.zip && \ -# mv nc_spm_08_micro /actinia_core/grassdb/nc_spm_08 -# RUN grass -e -c 'EPSG:4326' /actinia_core/grassdb/latlong_wgs84 +RUN grass -c EPSG:3358 /actinia_core/userdata/user/nc_spm_08 --text # copy needed files and configs for test COPY docker/actinia-processing-lib-test/actinia-processing-lib-test.cfg /etc/default/actinia @@ -37,6 +22,4 @@ COPY . /src/actinia-processing-lib/ WORKDIR /src/actinia-processing-lib/ RUN pip3 install --no-cache-dir -e . -RUN chmod a+x tests_with_kvdb.sh && make install - # RUN make test diff --git a/docker/actinia-processing-lib-test/actinia-processing-lib-test.cfg b/docker/actinia-processing-lib-test/actinia-processing-lib-test.cfg index dba529c..13dbbd7 100644 --- a/docker/actinia-processing-lib-test/actinia-processing-lib-test.cfg +++ b/docker/actinia-processing-lib-test/actinia-processing-lib-test.cfg @@ -8,7 +8,7 @@ grass_gis_start_script = /usr/local/bin/grass grass_addon_path = /root/.grass8/addons/ [API] -plugins = ["actinia_processing_lib"] +plugins = [] force_https_urls = True [KVDB] diff --git a/docker/actinia.cfg b/docker/actinia.cfg deleted file mode 100644 index 7f76342..0000000 --- a/docker/actinia.cfg +++ /dev/null @@ -1,28 +0,0 @@ -[GRASS] -grass_database = /actinia_core/grassdb -grass_user_database = /actinia_core/userdata -grass_tmp_database = /actinia_core/workspace/temp_db -grass_resource_dir = /actinia_core/resources -grass_gis_base = /usr/local/grass -grass_gis_start_script = /usr/local/bin/grass -grass_addon_path = /root/.grass8/addons/ - -[API] -plugins = ["actinia_processing_lib"] -force_https_urls = False - -[KVDB] -kvdb_server_url = valkey -kvdb_server_pw = pass -kvdb_resource_expire_time = 864001 -worker_logfile = /actinia_core/workspace/tmp/actinia_worker.log - -[LOGGING] -log_stdout_format = colored -log_level = 3 - -[MISC] -tmp_workdir = /actinia_core/workspace/tmp -download_cache = /actinia_core/workspace/download_cache -secret_key = token_signing_key_changeme -save_interim_results = True diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml deleted file mode 100644 index 23e897b..0000000 --- a/docker/docker-compose.yml +++ /dev/null @@ -1,42 +0,0 @@ -version: "3" -services: - - actinia: - build: - context: .. - dockerfile: docker/Dockerfile - volumes: - - ..:/src/actinia-processing-lib/. - ports: - - "8088:8088" - depends_on: - - valkey - cap_add: - - SYS_PTRACE - networks: - - actinia - - valkey: - image: valkey/valkey:8.1-alpine - volumes: - - ./valkey_data:/data - environment: - - VALKEY_PASS_FILE=/data/config/.valkey - command: [ - "sh", "-c", - ' - docker-entrypoint.sh - "/data/config/valkey.conf" - --requirepass "$$(cat $$VALKEY_PASS_FILE)" - ' - ] - ports: - - "6379:6379" - networks: - - actinia - -networks: - actinia: - ipam: - config: - - subnet: 172.18.0.0/16 diff --git a/docker/valkey_data/config/.valkey b/docker/valkey_data/config/.valkey deleted file mode 100644 index 2ae2839..0000000 --- a/docker/valkey_data/config/.valkey +++ /dev/null @@ -1 +0,0 @@ -pass diff --git a/docker/valkey_data/config/valkey.conf b/docker/valkey_data/config/valkey.conf deleted file mode 100644 index fb0203d..0000000 --- a/docker/valkey_data/config/valkey.conf +++ /dev/null @@ -1,2420 +0,0 @@ -# Valkey configuration file example. -# -# Note that in order to read the configuration file, the server must be -# started with the file path as first argument: -# -# ./valkey-server /path/to/valkey.conf - -# Note on units: when memory size is needed, it is possible to specify -# it in the usual form of 1k 5GB 4M and so forth: -# -# 1k => 1000 bytes -# 1kb => 1024 bytes -# 1m => 1000000 bytes -# 1mb => 1024*1024 bytes -# 1g => 1000000000 bytes -# 1gb => 1024*1024*1024 bytes -# -# units are case insensitive so 1GB 1Gb 1gB are all the same. - -################################## INCLUDES ################################### - -# Include one or more other config files here. This is useful if you -# have a standard template that goes to all servers but also need -# to customize a few per-server settings. Include files can include -# other files, so use this wisely. -# -# Note that option "include" won't be rewritten by command "CONFIG REWRITE" -# from admin or Sentinel. Since the server always uses the last processed -# line as value of a configuration directive, you'd better put includes -# at the beginning of this file to avoid overwriting config change at runtime. -# -# If instead you are interested in using includes to override configuration -# options, it is better to use include as the last line. -# -# Included paths may contain wildcards. All files matching the wildcards will -# be included in alphabetical order. -# Note that if an include path contains a wildcards but no files match it when -# the server is started, the include statement will be ignored and no error will -# be emitted. It is safe, therefore, to include wildcard files from empty -# directories. -# -# include /path/to/local.conf -# include /path/to/other.conf -# include /path/to/fragments/*.conf -# - -################################## MODULES ##################################### - -# Load modules at startup. If the server is not able to load modules -# it will abort. It is possible to use multiple loadmodule directives. -# -# loadmodule /path/to/my_module.so -# loadmodule /path/to/other_module.so -# loadmodule /path/to/args_module.so [arg [arg ...]] - -################################## NETWORK ##################################### - -# By default, if no "bind" configuration directive is specified, the server listens -# for connections from all available network interfaces on the host machine. -# It is possible to listen to just one or multiple selected interfaces using -# the "bind" configuration directive, followed by one or more IP addresses. -# Each address can be prefixed by "-", which means that the server will not fail to -# start if the address is not available. Being not available only refers to -# addresses that does not correspond to any network interface. Addresses that -# are already in use will always fail, and unsupported protocols will always be -# silently skipped. -# -# Examples: -# -# bind 192.168.1.100 10.0.0.1 # listens on two specific IPv4 addresses -# bind 127.0.0.1 ::1 # listens on loopback IPv4 and IPv6 -# bind * -::* # like the default, all available interfaces -# -# ~~~ WARNING ~~~ If the computer running the server is directly exposed to the -# internet, binding to all the interfaces is dangerous and will expose the -# instance to everybody on the internet. So by default we uncomment the -# following bind directive, that will force the server to listen only on the -# IPv4 and IPv6 (if available) loopback interface addresses (this means the server -# will only be able to accept client connections from the same host that it is -# running on). -# -# IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES -# COMMENT OUT THE FOLLOWING LINE. -# -# You will also need to set a password unless you explicitly disable protected -# mode. -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# bind 127.0.0.1 -::1 - -# By default, outgoing connections (from replica to primary, from Sentinel to -# instances, cluster bus, etc.) are not bound to a specific local address. In -# most cases, this means the operating system will handle that based on routing -# and the interface through which the connection goes out. -# -# Using bind-source-addr it is possible to configure a specific address to bind -# to, which may also affect how the connection gets routed. -# -# Example: -# -# bind-source-addr 10.0.0.1 - -# Protected mode is a layer of security protection, in order to avoid that -# the server instances left open on the internet are accessed and exploited. -# -# When protected mode is on and the default user has no password, the server -# only accepts local connections from the IPv4 address (127.0.0.1), IPv6 address -# (::1) or Unix domain sockets. -# -# By default protected mode is enabled. You should disable it only if -# you are sure you want clients from other hosts to connect to the server -# even if no authentication is configured. -protected-mode yes - -# The server uses default hardened security configuration directives to reduce the -# attack surface on innocent users. Therefore, several sensitive configuration -# directives are immutable, and some potentially-dangerous commands are blocked. -# -# Configuration directives that control files that the server writes to (e.g., 'dir' -# and 'dbfilename') and that aren't usually modified during runtime -# are protected by making them immutable. -# -# Commands that can increase the attack surface of the server and that aren't usually -# called by users are blocked by default. -# -# These can be exposed to either all connections or just local ones by setting -# each of the configs listed below to either of these values: -# -# no - Block for any connection (remain immutable) -# yes - Allow for any connection (no protection) -# local - Allow only for local connections. Ones originating from the -# IPv4 address (127.0.0.1), IPv6 address (::1) or Unix domain sockets. -# -# enable-protected-configs no -# enable-debug-command no -# enable-module-command no - -# Accept connections on the specified port, default is 6379 (IANA #815344). -# If port 0 is specified the server will not listen on a TCP socket. -port 6379 - -# TCP listen() backlog. -# -# In high requests-per-second environments you need a high backlog in order -# to avoid slow clients connection issues. Note that the Linux kernel -# will silently truncate it to the value of /proc/sys/net/core/somaxconn so -# make sure to raise both the value of somaxconn and tcp_max_syn_backlog -# in order to get the desired effect. -tcp-backlog 511 - -# Unix socket. -# -# Specify the path for the Unix socket that will be used to listen for -# incoming connections. There is no default, so the server will not listen -# on a unix socket when not specified. -# -# unixsocket /run/valkey.sock -# unixsocketgroup wheel -# unixsocketperm 700 - -# Close the connection after a client is idle for N seconds (0 to disable) -timeout 0 - -# TCP keepalive. -# -# If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence -# of communication. This is useful for two reasons: -# -# 1) Detect dead peers. -# 2) Force network equipment in the middle to consider the connection to be -# alive. -# -# On Linux, the specified value (in seconds) is the period used to send ACKs. -# Note that to close the connection the double of the time is needed. -# On other kernels the period depends on the kernel configuration. -tcp-keepalive 300 - -# Apply OS-specific mechanism to mark the listening socket with the specified -# ID, to support advanced routing and filtering capabilities. -# -# On Linux, the ID represents a connection mark. -# On FreeBSD, the ID represents a socket cookie ID. -# On OpenBSD, the ID represents a route table ID. -# -# The default value is 0, which implies no marking is required. -# socket-mark-id 0 - -################################# TLS/SSL ##################################### - -# By default, TLS/SSL is disabled. To enable it, the "tls-port" configuration -# directive can be used to define TLS-listening ports. To enable TLS on the -# default port, use: -# -# port 0 -# tls-port 6379 - -# Configure a X.509 certificate and private key to use for authenticating the -# server to connected clients, primaries or cluster peers. These files should be -# PEM formatted. -# -# tls-cert-file valkey.crt -# tls-key-file valkey.key -# -# If the key file is encrypted using a passphrase, it can be included here -# as well. -# -# tls-key-file-pass secret - -# Normally the server uses the same certificate for both server functions (accepting -# connections) and client functions (replicating from a primary, establishing -# cluster bus connections, etc.). -# -# Sometimes certificates are issued with attributes that designate them as -# client-only or server-only certificates. In that case it may be desired to use -# different certificates for incoming (server) and outgoing (client) -# connections. To do that, use the following directives: -# -# tls-client-cert-file client.crt -# tls-client-key-file client.key -# -# If the key file is encrypted using a passphrase, it can be included here -# as well. -# -# tls-client-key-file-pass secret - -# Configure a DH parameters file to enable Diffie-Hellman (DH) key exchange, -# required by older versions of OpenSSL (<3.0). Newer versions do not require -# this configuration and recommend against it. -# -# tls-dh-params-file valkey.dh - -# Configure a CA certificate(s) bundle or directory to authenticate TLS/SSL -# clients and peers. The server requires an explicit configuration of at least one -# of these, and will not implicitly use the system wide configuration. -# -# tls-ca-cert-file ca.crt -# tls-ca-cert-dir /etc/ssl/certs - -# By default, clients (including replica servers) on a TLS port are required -# to authenticate using valid client side certificates. -# -# If "no" is specified, client certificates are not required and not accepted. -# If "optional" is specified, client certificates are accepted and must be -# valid if provided, but are not required. -# -# tls-auth-clients no -# tls-auth-clients optional - -# By default, a replica does not attempt to establish a TLS connection -# with its primary. -# -# Use the following directive to enable TLS on replication links. -# -# tls-replication yes - -# By default, the cluster bus uses a plain TCP connection. To enable -# TLS for the bus protocol, use the following directive: -# -# tls-cluster yes - -# By default, only TLSv1.2 and TLSv1.3 are enabled and it is highly recommended -# that older formally deprecated versions are kept disabled to reduce the attack surface. -# You can explicitly specify TLS versions to support. -# Allowed values are case insensitive and include "TLSv1", "TLSv1.1", "TLSv1.2", -# "TLSv1.3" (OpenSSL >= 1.1.1) or any combination. -# To enable only TLSv1.2 and TLSv1.3, use: -# -# tls-protocols "TLSv1.2 TLSv1.3" - -# Configure allowed ciphers. See the ciphers(1ssl) manpage for more information -# about the syntax of this string. -# -# Note: this configuration applies only to <= TLSv1.2. -# -# tls-ciphers DEFAULT:!MEDIUM - -# Configure allowed TLSv1.3 ciphersuites. See the ciphers(1ssl) manpage for more -# information about the syntax of this string, and specifically for TLSv1.3 -# ciphersuites. -# -# tls-ciphersuites TLS_CHACHA20_POLY1305_SHA256 - -# When choosing a cipher, use the server's preference instead of the client -# preference. By default, the server follows the client's preference. -# -# tls-prefer-server-ciphers yes - -# By default, TLS session caching is enabled to allow faster and less expensive -# reconnections by clients that support it. Use the following directive to disable -# caching. -# -# tls-session-caching no - -# Change the default number of TLS sessions cached. A zero value sets the cache -# to unlimited size. The default size is 20480. -# -# tls-session-cache-size 5000 - -# Change the default timeout of cached TLS sessions. The default timeout is 300 -# seconds. -# -# tls-session-cache-timeout 60 - -################################# GENERAL ##################################### - -# By default the server does not run as a daemon. Use 'yes' if you need it. -# Note that the server will write a pid file in /var/run/valkey.pid when daemonized. -# When the server is supervised by upstart or systemd, this parameter has no impact. -daemonize no - -# If you run the server from upstart or systemd, the server can interact with your -# supervision tree. Options: -# supervised no - no supervision interaction -# supervised upstart - signal upstart by putting the server into SIGSTOP mode -# requires "expect stop" in your upstart job config -# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET -# on startup, and updating the server status on a regular -# basis. -# supervised auto - detect upstart or systemd method based on -# UPSTART_JOB or NOTIFY_SOCKET environment variables -# Note: these supervision methods only signal "process is ready." -# They do not enable continuous pings back to your supervisor. -# -# The default is "no". To run under upstart/systemd, you can simply uncomment -# the line below: -# -# supervised auto - -# If a pid file is specified, the server writes it where specified at startup -# and removes it at exit. -# -# When the server runs non daemonized, no pid file is created if none is -# specified in the configuration. When the server is daemonized, the pid file -# is used even if not specified, defaulting to "/var/run/valkey.pid". -# -# Creating a pid file is best effort: if the server is not able to create it -# nothing bad happens, the server will start and run normally. -# -# Note that on modern Linux systems "/run/valkey.pid" is more conforming -# and should be used instead. -pidfile /var/run/valkey_6379.pid - -# Specify the server verbosity level. -# This can be one of: -# debug (a lot of information, useful for development/testing) -# verbose (many rarely useful info, but not a mess like the debug level) -# notice (moderately verbose, what you want in production probably) -# warning (only very important / critical messages are logged) -# nothing (nothing is logged) -loglevel notice - -# Specify the logging format. -# This can be one of: -# -# - legacy: the default, traditional log format -# - logfmt: a structured log format; see https://www.brandur.org/logfmt -# -# log-format legacy - -# Specify the timestamp format used in logs using 'log-timestamp-format'. -# -# - legacy: default format -# - iso8601: ISO 8601 extended date and time with time zone, on the form -# yyyy-mm-ddThh:mm:ss.sss±hh:mm -# - milliseconds: milliseconds since the epoch -# -# log-timestamp-format legacy - -# Specify the log file name. Also the empty string can be used to force -# the server to log on the standard output. Note that if you use standard -# output for logging but daemonize, logs will be sent to /dev/null -logfile "" - -# To enable logging to the system logger, just set 'syslog-enabled' to yes, -# and optionally update the other syslog parameters to suit your needs. -# syslog-enabled no - -# Specify the syslog identity. -# syslog-ident valkey - -# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. -# syslog-facility local0 - -# To disable the built in crash log, which will possibly produce cleaner core -# dumps when they are needed, uncomment the following: -# -# crash-log-enabled no - -# To disable the fast memory check that's run as part of the crash log, which -# will possibly let the server terminate sooner, uncomment the following: -# -# crash-memcheck-enabled no - -# Set the number of databases. The default database is DB 0, you can select -# a different one on a per-connection basis using SELECT where -# dbid is a number between 0 and 'databases'-1 -databases 16 - -# By default the server shows an ASCII art logo only when started to log to the -# standard output and if the standard output is a TTY and syslog logging is -# disabled. Basically this means that normally a logo is displayed only in -# interactive sessions. -# -# However it is possible to force the pre-4.0 behavior and always show a -# ASCII art logo in startup logs by setting the following option to yes. -always-show-logo no - -# User data, including keys, values, client names, and ACL usernames, can be -# logged as part of assertions and other error cases. To prevent sensitive user -# information, such as PII, from being recorded in the server log file, this -# user data is hidden from the log by default. If you need to log user data for -# debugging or troubleshooting purposes, you can disable this feature by -# changing the config value to no. -hide-user-data-from-log yes - -# By default, the server modifies the process title (as seen in 'top' and 'ps') to -# provide some runtime information. It is possible to disable this and leave -# the process name as executed by setting the following to no. -set-proc-title yes - -# When changing the process title, the server uses the following template to construct -# the modified title. -# -# Template variables are specified in curly brackets. The following variables are -# supported: -# -# {title} Name of process as executed if parent, or type of child process. -# {listen-addr} Bind address or '*' followed by TCP or TLS port listening on, or -# Unix socket if only that's available. -# {server-mode} Special mode, i.e. "[sentinel]" or "[cluster]". -# {port} TCP port listening on, or 0. -# {tls-port} TLS port listening on, or 0. -# {unixsocket} Unix domain socket listening on, or "". -# {config-file} Name of configuration file used. -# -proc-title-template "{title} {listen-addr} {server-mode}" - -# Set the local environment which is used for string comparison operations, and -# also affect the performance of Lua scripts. Empty String indicates the locale -# is derived from the environment variables. -locale-collate "" - -# Valkey is largely compatible with Redis OSS, apart from a few cases where -# Valkey identifies itself itself as "Valkey" rather than "Redis". Extended -# Redis OSS compatibility mode makes Valkey pretend to be Redis. Enable this -# only if you have problems with tools or clients. This is a temporary -# configuration added in Valkey 8.0 and is scheduled to have no effect in Valkey -# 9.0 and be completely removed in Valkey 10.0. -# -# extended-redis-compatibility no - -################################ SNAPSHOTTING ################################ - -# Save the DB to disk. -# -# save [ ...] -# -# The server will save the DB if the given number of seconds elapsed and it -# surpassed the given number of write operations against the DB. -# -# Snapshotting can be completely disabled with a single empty string argument -# as in following example: -# -# save "" -# -# Unless specified otherwise, by default the server will save the DB: -# * After 3600 seconds (an hour) if at least 1 change was performed -# * After 300 seconds (5 minutes) if at least 100 changes were performed -# * After 60 seconds if at least 10000 changes were performed -# -# You can set these explicitly by uncommenting the following line. -# -# save 3600 1 300 100 60 10000 - -# By default the server will stop accepting writes if RDB snapshots are enabled -# (at least one save point) and the latest background save failed. -# This will make the user aware (in a hard way) that data is not persisting -# on disk properly, otherwise chances are that no one will notice and some -# disaster will happen. -# -# If the background saving process will start working again, the server will -# automatically allow writes again. -# -# However if you have setup your proper monitoring of the server -# and persistence, you may want to disable this feature so that the server will -# continue to work as usual even if there are problems with disk, -# permissions, and so forth. -stop-writes-on-bgsave-error yes - -# Compress string objects using LZF when dump .rdb databases? -# By default compression is enabled as it's almost always a win. -# If you want to save some CPU in the saving child set it to 'no' but -# the dataset will likely be bigger if you have compressible values or keys. -rdbcompression yes - -# Since version 5 of RDB a CRC64 checksum is placed at the end of the file. -# This makes the format more resistant to corruption but there is a performance -# hit to pay (around 10%) when saving and loading RDB files, so you can disable it -# for maximum performances. -# -# RDB files created with checksum disabled have a checksum of zero that will -# tell the loading code to skip the check. -rdbchecksum yes - -# Enables or disables full sanitization checks for ziplist and listpack etc when -# loading an RDB or RESTORE payload. This reduces the chances of a assertion or -# crash later on while processing commands. -# Options: -# no - Never perform full sanitization -# yes - Always perform full sanitization -# clients - Perform full sanitization only for user connections. -# Excludes: RDB files, RESTORE commands received from the primary -# connection, and client connections which have the -# skip-sanitize-payload ACL flag. -# The default should be 'clients' but since it currently affects cluster -# resharding via MIGRATE, it is temporarily set to 'no' by default. -# -# sanitize-dump-payload no - -# The filename where to dump the DB -dbfilename dump.rdb - -# Remove RDB files used by replication in instances without persistence -# enabled. By default this option is disabled, however there are environments -# where for regulations or other security concerns, RDB files persisted on -# disk by primaries in order to feed replicas, or stored on disk by replicas -# in order to load them for the initial synchronization, should be deleted -# ASAP. Note that this option ONLY WORKS in instances that have both AOF -# and RDB persistence disabled, otherwise is completely ignored. -# -# An alternative (and sometimes better) way to obtain the same effect is -# to use diskless replication on both primary and replicas instances. However -# in the case of replicas, diskless is not always an option. -rdb-del-sync-files no - -# The working directory. -# -# The DB will be written inside this directory, with the filename specified -# above using the 'dbfilename' configuration directive. -# -# The Append Only File will also be created inside this directory. -# -# The Cluster config file is written relative this directory, if the -# 'cluster-config-file' configuration directive is a relative path. -# -# Note that you must specify a directory here, not a file name. -dir ./ - -################################# REPLICATION ################################# - -# Master-Replica replication. Use replicaof to make a server a copy of -# another server. A few things to understand ASAP about replication. -# -# +------------------+ +---------------+ -# | Master | ---> | Replica | -# | (receive writes) | | (exact copy) | -# +------------------+ +---------------+ -# -# 1) Replication is asynchronous, but you can configure a primary to -# stop accepting writes if it appears to be not connected with at least -# a given number of replicas. -# 2) Replicas are able to perform a partial resynchronization with the -# primary if the replication link is lost for a relatively small amount of -# time. You may want to configure the replication backlog size (see the next -# sections of this file) with a sensible value depending on your needs. -# 3) Replication is automatic and does not need user intervention. After a -# network partition replicas automatically try to reconnect to primaries -# and resynchronize with them. -# -# replicaof - -# If the primary is password protected (using the "requirepass" configuration -# directive below) it is possible to tell the replica to authenticate before -# starting the replication synchronization process, otherwise the primary will -# refuse the replica request. -# -# primaryauth -# -# However this is not enough if you are using ACLs -# and the default user is not capable of running the PSYNC -# command and/or other commands needed for replication. In this case it's -# better to configure a special user to use with replication, and specify the -# primaryuser configuration as such: -# -# primaryuser -# -# When primaryuser is specified, the replica will authenticate against its -# primary using the new AUTH form: AUTH . - -# When a replica loses its connection with the primary, or when the replication -# is still in progress, the replica can act in two different ways: -# -# 1) if replica-serve-stale-data is set to 'yes' (the default) the replica will -# still reply to client requests, possibly with out of date data, or the -# data set may just be empty if this is the first synchronization. -# -# 2) If replica-serve-stale-data is set to 'no' the replica will reply with error -# "MASTERDOWN Link with MASTER is down and replica-serve-stale-data is set to 'no'" -# to all data access commands, excluding commands such as: -# INFO, REPLICAOF, AUTH, SHUTDOWN, REPLCONF, ROLE, CONFIG, SUBSCRIBE, -# UNSUBSCRIBE, PSUBSCRIBE, PUNSUBSCRIBE, PUBLISH, PUBSUB, COMMAND, POST, -# HOST and LATENCY. -# -replica-serve-stale-data yes - -# You can configure a replica instance to accept writes or not. Writing against -# a replica instance may be useful to store some ephemeral data (because data -# written on a replica will be easily deleted after resync with the primary) but -# may also cause problems if clients are writing to it because of a -# misconfiguration. -# -# By default, replicas are read-only. -# -# Note: read only replicas are not designed to be exposed to untrusted clients -# on the internet. It's just a protection layer against misuse of the instance. -# Still a read only replica exports by default all the administrative commands -# such as CONFIG, DEBUG, and so forth. To a limited extent you can improve -# security of read only replicas using 'rename-command' to shadow all the -# administrative / dangerous commands. -replica-read-only yes - -# Replication SYNC strategy: disk or socket. -# -# New replicas and reconnecting replicas that are not able to continue the -# replication process just receiving differences, need to do what is called a -# "full synchronization". An RDB file is transmitted from the primary to the -# replicas. -# -# The transmission can happen in two different ways: -# -# 1) Disk-backed: The primary creates a new process that writes the RDB -# file on disk. Later the file is transferred by the parent -# process to the replicas incrementally. -# 2) Diskless: The primary creates a new process that directly writes the -# RDB file to replica sockets, without touching the disk at all. -# -# With disk-backed replication, while the RDB file is generated, more replicas -# can be queued and served with the RDB file as soon as the current child -# producing the RDB file finishes its work. With diskless replication instead -# once the transfer starts, new replicas arriving will be queued and a new -# transfer will start when the current one terminates. -# -# When diskless replication is used, the primary waits a configurable amount of -# time (in seconds) before starting the transfer in the hope that multiple -# replicas will arrive and the transfer can be parallelized. -# -# With slow disks and fast (large bandwidth) networks, diskless replication -# works better. -repl-diskless-sync yes - -# When diskless replication is enabled, it is possible to configure the delay -# the server waits in order to spawn the child that transfers the RDB via socket -# to the replicas. -# -# This is important since once the transfer starts, it is not possible to serve -# new replicas arriving, that will be queued for the next RDB transfer, so the -# server waits a delay in order to let more replicas arrive. -# -# The delay is specified in seconds, and by default is 5 seconds. To disable -# it entirely just set it to 0 seconds and the transfer will start ASAP. -repl-diskless-sync-delay 5 - -# When diskless replication is enabled with a delay, it is possible to let -# the replication start before the maximum delay is reached if the maximum -# number of replicas expected have connected. Default of 0 means that the -# maximum is not defined and the server will wait the full delay. -repl-diskless-sync-max-replicas 0 - -# ----------------------------------------------------------------------------- -# WARNING: Since in this setup the replica does not immediately store an RDB on -# disk, it may cause data loss during failovers. RDB diskless load + server -# modules not handling I/O reads may cause the server to abort in case of I/O errors -# during the initial synchronization stage with the primary. -# ----------------------------------------------------------------------------- -# -# Replica can load the RDB it reads from the replication link directly from the -# socket, or store the RDB to a file and read that file after it was completely -# received from the primary. -# -# In many cases the disk is slower than the network, and storing and loading -# the RDB file may increase replication time (and even increase the primary's -# Copy on Write memory and replica buffers). -# However, when parsing the RDB file directly from the socket, in order to avoid -# data loss it's only safe to flush the current dataset when the new dataset is -# fully loaded in memory, resulting in higher memory usage. -# For this reason we have the following options: -# -# "disabled" - Don't use diskless load (store the rdb file to the disk first) -# "swapdb" - Keep current db contents in RAM while parsing the data directly -# from the socket. Replicas in this mode can keep serving current -# dataset while replication is in progress, except for cases where -# they can't recognize primary as having a data set from same -# replication history. -# Note that this requires sufficient memory, if you don't have it, -# you risk an OOM kill. -# "on-empty-db" - Use diskless load only when current dataset is empty. This is -# safer and avoid having old and new dataset loaded side by side -# during replication. -# "flush-before-load" - [dangerous] Flush all data before parsing. Note that if -# there's a problem before the replication succeeded you may -# lose all your data. -repl-diskless-load disabled - -# This dual channel replication sync feature optimizes the full synchronization process -# between a primary and its replicas. When enabled, it reduces both memory and CPU load -# on the primary server. -# -# How it works: -# 1. During full sync, instead of accumulating replication data on the primary server, -# the data is sent directly to the syncing replica. -# 2. The primary's background save (bgsave) process streams the RDB snapshot directly -# to the replica over a separate connection. -# -# Tradeoff: -# While this approach reduces load on the primary, it shifts the burden of storing -# the replication buffer to the replica. This means the replica must have sufficient -# memory to accommodate the buffer during synchronization. However, this tradeoff is -# generally beneficial as it prevents potential performance degradation on the primary -# server, which is typically handling more critical operations. -# -# When toggling this configuration on or off during an ongoing synchronization process, -# it does not change the already running sync method. The new configuration will take -# effect only for subsequent synchronization processes. - -dual-channel-replication-enabled no - -# Master send PINGs to its replicas in a predefined interval. It's possible to -# change this interval with the repl_ping_replica_period option. The default -# value is 10 seconds. -# -# repl-ping-replica-period 10 - -# The following option sets the replication timeout for: -# -# 1) Bulk transfer I/O during SYNC, from the point of view of replica. -# 2) Master timeout from the point of view of replicas (data, pings). -# 3) Replica timeout from the point of view of primaries (REPLCONF ACK pings). -# -# It is important to make sure that this value is greater than the value -# specified for repl-ping-replica-period otherwise a timeout will be detected -# every time there is low traffic between the primary and the replica. The default -# value is 60 seconds. -# -# repl-timeout 60 - -# Disable TCP_NODELAY on the replica socket after SYNC? -# -# If you select "yes", the server will use a smaller number of TCP packets and -# less bandwidth to send data to replicas. But this can add a delay for -# the data to appear on the replica side, up to 40 milliseconds with -# Linux kernels using a default configuration. -# -# If you select "no" the delay for data to appear on the replica side will -# be reduced but more bandwidth will be used for replication. -# -# By default we optimize for low latency, but in very high traffic conditions -# or when the primary and replicas are many hops away, turning this to "yes" may -# be a good idea. -repl-disable-tcp-nodelay no - -# Set the replication backlog size. The backlog is a buffer that accumulates -# replica data when replicas are disconnected for some time, so that when a -# replica wants to reconnect again, often a full resync is not needed, but a -# partial resync is enough, just passing the portion of data the replica -# missed while disconnected. -# -# The bigger the replication backlog, the longer the replica can endure the -# disconnect and later be able to perform a partial resynchronization. -# -# The backlog is only allocated if there is at least one replica connected. -# -# repl-backlog-size 10mb - -# After a primary has no connected replicas for some time, the backlog will be -# freed. The following option configures the amount of seconds that need to -# elapse, starting from the time the last replica disconnected, for the backlog -# buffer to be freed. -# -# Note that replicas never free the backlog for timeout, since they may be -# promoted to primaries later, and should be able to correctly "partially -# resynchronize" with other replicas: hence they should always accumulate backlog. -# -# A value of 0 means to never release the backlog. -# -# repl-backlog-ttl 3600 - -# The replica priority is an integer number published by the server in the INFO -# output. It is used by Sentinel in order to select a replica to promote -# into a primary if the primary is no longer working correctly. -# -# A replica with a low priority number is considered better for promotion, so -# for instance if there are three replicas with priority 10, 100, 25 Sentinel -# will pick the one with priority 10, that is the lowest. -# -# However a special priority of 0 marks the replica as not able to perform the -# role of primary, so a replica with priority of 0 will never be selected by -# Sentinel for promotion. -# -# By default the priority is 100. -replica-priority 100 - -# The propagation error behavior controls how the server will behave when it is -# unable to handle a command being processed in the replication stream from a primary -# or processed while reading from an AOF file. Errors that occur during propagation -# are unexpected, and can cause data inconsistency. -# -# If an application wants to ensure there is no data divergence, this configuration -# should be set to 'panic' instead. The value can also be set to 'panic-on-replicas' -# to only panic when a replica encounters an error on the replication stream. One of -# these two panic values will become the default value in the future once there are -# sufficient safety mechanisms in place to prevent false positive crashes. -# -# propagation-error-behavior ignore - -# Replica ignore disk write errors controls the behavior of a replica when it is -# unable to persist a write command received from its primary to disk. By default, -# this configuration is set to 'no' and will crash the replica in this condition. -# It is not recommended to change this default. -# -# replica-ignore-disk-write-errors no - -# ----------------------------------------------------------------------------- -# By default, Sentinel includes all replicas in its reports. A replica -# can be excluded from Sentinel's announcements. An unannounced replica -# will be ignored by the 'sentinel replicas ' command and won't be -# exposed to Sentinel's clients. -# -# This option does not change the behavior of replica-priority. Even with -# replica-announced set to 'no', the replica can be promoted to primary. To -# prevent this behavior, set replica-priority to 0. -# -# replica-announced yes - -# It is possible for a primary to stop accepting writes if there are less than -# N replicas connected, having a lag less or equal than M seconds. -# -# The N replicas need to be in "online" state. -# -# The lag in seconds, that must be <= the specified value, is calculated from -# the last ping received from the replica, that is usually sent every second. -# -# This option does not GUARANTEE that N replicas will accept the write, but -# will limit the window of exposure for lost writes in case not enough replicas -# are available, to the specified number of seconds. -# -# For example to require at least 3 replicas with a lag <= 10 seconds use: -# -# min-replicas-to-write 3 -# min-replicas-max-lag 10 -# -# Setting one or the other to 0 disables the feature. -# -# By default min-replicas-to-write is set to 0 (feature disabled) and -# min-replicas-max-lag is set to 10. - -# A primary is able to list the address and port of the attached -# replicas in different ways. For example the "INFO replication" section -# offers this information, which is used, among other tools, by -# Sentinel in order to discover replica instances. -# Another place where this info is available is in the output of the -# "ROLE" command of a primary. -# -# The listed IP address and port normally reported by a replica is -# obtained in the following way: -# -# IP: The address is auto detected by checking the peer address -# of the socket used by the replica to connect with the primary. -# -# Port: The port is communicated by the replica during the replication -# handshake, and is normally the port that the replica is using to -# listen for connections. -# -# However when port forwarding or Network Address Translation (NAT) is -# used, the replica may actually be reachable via different IP and port -# pairs. The following two options can be used by a replica in order to -# report to its primary a specific set of IP and port, so that both INFO -# and ROLE will report those values. -# -# There is no need to use both the options if you need to override just -# the port or the IP address. -# -# replica-announce-ip 5.5.5.5 -# replica-announce-port 1234 - -############################### KEYS TRACKING ################################# - -# The client side caching of values is assisted via server-side support. -# This is implemented using an invalidation table that remembers, using -# a radix key indexed by key name, what clients have which keys. In turn -# this is used in order to send invalidation messages to clients. Please -# check this page to understand more about the feature: -# -# https://valkey.io/topics/client-side-caching -# -# When tracking is enabled for a client, all the read only queries are assumed -# to be cached: this will force the server to store information in the invalidation -# table. When keys are modified, such information is flushed away, and -# invalidation messages are sent to the clients. However if the workload is -# heavily dominated by reads, the server could use more and more memory in order -# to track the keys fetched by many clients. -# -# For this reason it is possible to configure a maximum fill value for the -# invalidation table. By default it is set to 1M of keys, and once this limit -# is reached, the server will start to evict keys in the invalidation table -# even if they were not modified, just to reclaim memory: this will in turn -# force the clients to invalidate the cached values. Basically the table -# maximum size is a trade off between the memory you want to spend server -# side to track information about who cached what, and the ability of clients -# to retain cached objects in memory. -# -# If you set the value to 0, it means there are no limits, and the server will -# retain as many keys as needed in the invalidation table. -# In the "stats" INFO section, you can find information about the number of -# keys in the invalidation table at every given moment. -# -# Note: when key tracking is used in broadcasting mode, no memory is used -# in the server side so this setting is useless. -# -# tracking-table-max-keys 1000000 - -################################## SECURITY ################################### - -# Warning: since the server is pretty fast, an outside user can try up to -# 1 million passwords per second against a modern box. This means that you -# should use very strong passwords, otherwise they will be very easy to break. -# Note that because the password is really a shared secret between the client -# and the server, and should not be memorized by any human, the password -# can be easily a long string from /dev/urandom or whatever, so by using a -# long and unguessable password no brute force attack will be possible. - -# ACL users are defined in the following format: -# -# user ... acl rules ... -# -# For example: -# -# user worker +@list +@connection ~jobs:* on >ffa9203c493aa99 -# -# The special username "default" is used for new connections. If this user -# has the "nopass" rule, then new connections will be immediately authenticated -# as the "default" user without the need of any password provided via the -# AUTH command. Otherwise if the "default" user is not flagged with "nopass" -# the connections will start in not authenticated state, and will require -# AUTH (or the HELLO command AUTH option) in order to be authenticated and -# start to work. -# -# The ACL rules that describe what a user can do are the following: -# -# on Enable the user: it is possible to authenticate as this user. -# off Disable the user: it's no longer possible to authenticate -# with this user, however the already authenticated connections -# will still work. -# skip-sanitize-payload RESTORE dump-payload sanitization is skipped. -# sanitize-payload RESTORE dump-payload is sanitized (default). -# + Allow the execution of that command. -# May be used with `|` for allowing subcommands (e.g "+config|get") -# - Disallow the execution of that command. -# May be used with `|` for blocking subcommands (e.g "-config|set") -# +@ Allow the execution of all the commands in such category -# with valid categories are like @admin, @set, @sortedset, ... -# and so forth, see the full list in the server.c file where -# the server command table is described and defined. -# The special category @all means all the commands, but currently -# present in the server, and that will be loaded in the future -# via modules. -# +|first-arg Allow a specific first argument of an otherwise -# disabled command. It is only supported on commands with -# no sub-commands, and is not allowed as negative form -# like -SELECT|1, only additive starting with "+". This -# feature is deprecated and may be removed in the future. -# allcommands Alias for +@all. Note that it implies the ability to execute -# all the future commands loaded via the modules system. -# nocommands Alias for -@all. -# ~ Add a pattern of keys that can be mentioned as part of -# commands. For instance ~* allows all the keys. The pattern -# is a glob-style pattern like the one of KEYS. -# It is possible to specify multiple patterns. -# %R~ Add key read pattern that specifies which keys can be read -# from. -# %W~ Add key write pattern that specifies which keys can be -# written to. -# allkeys Alias for ~* -# resetkeys Flush the list of allowed keys patterns. -# & Add a glob-style pattern of Pub/Sub channels that can be -# accessed by the user. It is possible to specify multiple channel -# patterns. -# allchannels Alias for &* -# resetchannels Flush the list of allowed channel patterns. -# > Add this password to the list of valid password for the user. -# For example >mypass will add "mypass" to the list. -# This directive clears the "nopass" flag (see later). -# < Remove this password from the list of valid passwords. -# nopass All the set passwords of the user are removed, and the user -# is flagged as requiring no password: it means that every -# password will work against this user. If this directive is -# used for the default user, every new connection will be -# immediately authenticated with the default user without -# any explicit AUTH command required. Note that the "resetpass" -# directive will clear this condition. -# resetpass Flush the list of allowed passwords. Moreover removes the -# "nopass" status. After "resetpass" the user has no associated -# passwords and there is no way to authenticate without adding -# some password (or setting it as "nopass" later). -# reset Performs the following actions: resetpass, resetkeys, resetchannels, -# allchannels (if acl-pubsub-default is set), off, clearselectors, -@all. -# The user returns to the same state it has immediately after its creation. -# () Create a new selector with the options specified within the -# parentheses and attach it to the user. Each option should be -# space separated. The first character must be ( and the last -# character must be ). -# clearselectors Remove all of the currently attached selectors. -# Note this does not change the "root" user permissions, -# which are the permissions directly applied onto the -# user (outside the parentheses). -# -# ACL rules can be specified in any order: for instance you can start with -# passwords, then flags, or key patterns. However note that the additive -# and subtractive rules will CHANGE MEANING depending on the ordering. -# For instance see the following example: -# -# user alice on +@all -DEBUG ~* >somepassword -# -# This will allow "alice" to use all the commands with the exception of the -# DEBUG command, since +@all added all the commands to the set of the commands -# alice can use, and later DEBUG was removed. However if we invert the order -# of two ACL rules the result will be different: -# -# user alice on -DEBUG +@all ~* >somepassword -# -# Now DEBUG was removed when alice had yet no commands in the set of allowed -# commands, later all the commands are added, so the user will be able to -# execute everything. -# -# Basically ACL rules are processed left-to-right. -# -# The following is a list of command categories and their meanings: -# * keyspace - Writing or reading from keys, databases, or their metadata -# in a type agnostic way. Includes DEL, RESTORE, DUMP, RENAME, EXISTS, DBSIZE, -# KEYS, EXPIRE, TTL, FLUSHALL, etc. Commands that may modify the keyspace, -# key or metadata will also have `write` category. Commands that only read -# the keyspace, key or metadata will have the `read` category. -# * read - Reading from keys (values or metadata). Note that commands that don't -# interact with keys, will not have either `read` or `write`. -# * write - Writing to keys (values or metadata) -# * admin - Administrative commands. Normal applications will never need to use -# these. Includes REPLICAOF, CONFIG, DEBUG, SAVE, MONITOR, ACL, SHUTDOWN, etc. -# * dangerous - Potentially dangerous (each should be considered with care for -# various reasons). This includes FLUSHALL, MIGRATE, RESTORE, SORT, KEYS, -# CLIENT, DEBUG, INFO, CONFIG, SAVE, REPLICAOF, etc. -# * connection - Commands affecting the connection or other connections. -# This includes AUTH, SELECT, COMMAND, CLIENT, ECHO, PING, etc. -# * blocking - Potentially blocking the connection until released by another -# command. -# * fast - Fast O(1) commands. May loop on the number of arguments, but not the -# number of elements in the key. -# * slow - All commands that are not Fast. -# * pubsub - PUBLISH / SUBSCRIBE related -# * transaction - WATCH / MULTI / EXEC related commands. -# * scripting - Scripting related. -# * set - Data type: sets related. -# * sortedset - Data type: zsets related. -# * list - Data type: lists related. -# * hash - Data type: hashes related. -# * string - Data type: strings related. -# * bitmap - Data type: bitmaps related. -# * hyperloglog - Data type: hyperloglog related. -# * geo - Data type: geo related. -# * stream - Data type: streams related. -# -# For more information about ACL configuration please refer to -# the Valkey web site at https://valkey.io/topics/acl - -# ACL LOG -# -# The ACL Log tracks failed commands and authentication events associated -# with ACLs. The ACL Log is useful to troubleshoot failed commands blocked -# by ACLs. The ACL Log is stored in memory. You can reclaim memory with -# ACL LOG RESET. Define the maximum entry length of the ACL Log below. -acllog-max-len 128 - -# Using an external ACL file -# -# Instead of configuring users here in this file, it is possible to use -# a stand-alone file just listing users. The two methods cannot be mixed: -# if you configure users here and at the same time you activate the external -# ACL file, the server will refuse to start. -# -# The format of the external ACL user file is exactly the same as the -# format that is used inside valkey.conf to describe users. -# -# aclfile /etc/valkey/users.acl - -# IMPORTANT NOTE: "requirepass" is just a compatibility -# layer on top of the new ACL system. The option effect will be just setting -# the password for the default user. Clients will still authenticate using -# AUTH as usually, or more explicitly with AUTH default -# if they follow the new protocol: both will work. -# -# The requirepass is not compatible with aclfile option and the ACL LOAD -# command, these will cause requirepass to be ignored. -# -# requirepass foobared - -# The default Pub/Sub channels permission for new users is controlled by the -# acl-pubsub-default configuration directive, which accepts one of these values: -# -# allchannels: grants access to all Pub/Sub channels -# resetchannels: revokes access to all Pub/Sub channels -# -# acl-pubsub-default defaults to 'resetchannels' permission. -# -# acl-pubsub-default resetchannels - -# Command renaming (DEPRECATED). -# -# ------------------------------------------------------------------------ -# WARNING: avoid using this option if possible. Instead use ACLs to remove -# commands from the default user, and put them only in some admin user you -# create for administrative purposes. -# ------------------------------------------------------------------------ -# -# It is possible to change the name of dangerous commands in a shared -# environment. For instance the CONFIG command may be renamed into something -# hard to guess so that it will still be available for internal-use tools -# but not available for general clients. -# -# Example: -# -# rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 -# -# It is also possible to completely kill a command by renaming it into -# an empty string: -# -# rename-command CONFIG "" -# -# Please note that changing the name of commands that are logged into the -# AOF file or transmitted to replicas may cause problems. - -################################### CLIENTS #################################### - -# Set the max number of connected clients at the same time. By default -# this limit is set to 10000 clients, however if the server is not -# able to configure the process file limit to allow for the specified limit -# the max number of allowed clients is set to the current file limit -# minus 32 (as the server reserves a few file descriptors for internal uses). -# -# Once the limit is reached the server will close all the new connections sending -# an error 'max number of clients reached'. -# -# IMPORTANT: With a cluster-enabled setup, the max number of connections is also -# shared with the cluster bus: every node in the cluster will use two -# connections, one incoming and another outgoing. It is important to size the -# limit accordingly in case of very large clusters. -# -# maxclients 10000 - -############################## MEMORY MANAGEMENT ################################ - -# Set a memory usage limit to the specified amount of bytes. -# When the memory limit is reached the server will try to remove keys -# according to the eviction policy selected (see maxmemory-policy). -# -# If the server can't remove keys according to the policy, or if the policy is -# set to 'noeviction', the server will start to reply with errors to commands -# that would use more memory, like SET, LPUSH, and so on, and will continue -# to reply to read-only commands like GET. -# -# This option is usually useful when using the server as an LRU or LFU cache, or to -# set a hard memory limit for an instance (using the 'noeviction' policy). -# -# WARNING: If you have replicas attached to an instance with maxmemory on, -# the size of the output buffers needed to feed the replicas are subtracted -# from the used memory count, so that network problems / resyncs will -# not trigger a loop where keys are evicted, and in turn the output -# buffer of replicas is full with DELs of keys evicted triggering the deletion -# of more keys, and so forth until the database is completely emptied. -# -# In short... if you have replicas attached it is suggested that you set a lower -# limit for maxmemory so that there is some free RAM on the system for replica -# output buffers (but this is not needed if the policy is 'noeviction'). -# -# maxmemory - -# MAXMEMORY POLICY: how the server will select what to remove when maxmemory -# is reached. You can select one from the following behaviors: -# -# volatile-lru -> Evict using approximated LRU, only keys with an expire set. -# allkeys-lru -> Evict any key using approximated LRU. -# volatile-lfu -> Evict using approximated LFU, only keys with an expire set. -# allkeys-lfu -> Evict any key using approximated LFU. -# volatile-random -> Remove a random key having an expire set. -# allkeys-random -> Remove a random key, any key. -# volatile-ttl -> Remove the key with the nearest expire time (minor TTL) -# noeviction -> Don't evict anything, just return an error on write operations. -# -# LRU means Least Recently Used -# LFU means Least Frequently Used -# -# Both LRU, LFU and volatile-ttl are implemented using approximated -# randomized algorithms. -# -# Note: with any of the above policies, when there are no suitable keys for -# eviction, the server will return an error on write operations that require -# more memory. These are usually commands that create new keys, add data or -# modify existing keys. A few examples are: SET, INCR, HSET, LPUSH, SUNIONSTORE, -# SORT (due to the STORE argument), and EXEC (if the transaction includes any -# command that requires memory). -# -# The default is: -# -# maxmemory-policy noeviction - -# LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated -# algorithms (in order to save memory), so you can tune it for speed or -# accuracy. By default the server will check five keys and pick the one that was -# used least recently, you can change the sample size using the following -# configuration directive. -# -# The default of 5 produces good enough results. 10 Approximates very closely -# true LRU but costs more CPU. 3 is faster but not very accurate. The maximum -# value that can be set is 64. -# -# maxmemory-samples 5 - -# Eviction processing is designed to function well with the default setting. -# If there is an unusually large amount of write traffic, this value may need to -# be increased. Decreasing this value may reduce latency at the risk of -# eviction processing effectiveness -# 0 = minimum latency, 10 = default, 100 = process without regard to latency -# -# maxmemory-eviction-tenacity 10 - -# By default a replica will ignore its maxmemory setting -# (unless it is promoted to primary after a failover or manually). It means -# that the eviction of keys will be just handled by the primary, sending the -# DEL commands to the replica as keys evict in the primary side. -# -# This behavior ensures that primaries and replicas stay consistent, and is usually -# what you want, however if your replica is writable, or you want the replica -# to have a different memory setting, and you are sure all the writes performed -# to the replica are idempotent, then you may change this default (but be sure -# to understand what you are doing). -# -# Note that since the replica by default does not evict, it may end using more -# memory than the one set via maxmemory (there are certain buffers that may -# be larger on the replica, or data structures may sometimes take more memory -# and so forth). So make sure you monitor your replicas and make sure they -# have enough memory to never hit a real out-of-memory condition before the -# primary hits the configured maxmemory setting. -# -# replica-ignore-maxmemory yes - -# The server reclaims expired keys in two ways: upon access when those keys are -# found to be expired, and also in background, in what is called the -# "active expire key". The key space is slowly and interactively scanned -# looking for expired keys to reclaim, so that it is possible to free memory -# of keys that are expired and will never be accessed again in a short time. -# -# The default effort of the expire cycle will try to avoid having more than -# ten percent of expired keys still in memory, and will try to avoid consuming -# more than 25% of total memory and to add latency to the system. However -# it is possible to increase the expire "effort" that is normally set to -# "1", to a greater value, up to the value "10". At its maximum value the -# system will use more CPU, longer cycles (and technically may introduce -# more latency), and will tolerate less already expired keys still present -# in the system. It's a tradeoff between memory, CPU and latency. -# -# active-expire-effort 1 - -############################# LAZY FREEING #################################### - -# When keys are deleted, the served has historically freed their memory using -# blocking operations. It means that the server stopped processing new commands -# in order to reclaim all the memory associated with an object in a synchronous -# way. If the key deleted is associated with a small object, the time needed -# in order to execute the DEL command is very small and comparable to most other -# O(1) or O(log_N) commands in the server. However if the key is associated with an -# aggregated value containing millions of elements, the server can block for -# a long time (even seconds) in order to complete the operation. -# -# For the above reasons, lazy freeing (or asynchronous freeing), has been -# introduced. With lazy freeing, keys are deleted in constant time. Another -# thread will incrementally free the object in the background as fast as -# possible. -# -# Starting from Valkey 8.0, lazy freeing is enabled by default. It is possible -# to retain the synchronous freeing behaviour by setting the lazyfree related -# configuration directives to 'no'. - -# Commands like DEL, FLUSHALL and FLUSHDB delete keys, but the server can also -# delete keys or flush the whole database as a side effect of other operations. -# Specifically the server deletes objects independently of a user call in the -# following scenarios: -# -# 1) On eviction, because of the maxmemory and maxmemory policy configurations, -# in order to make room for new data, without going over the specified -# memory limit. -# 2) Because of expire: when a key with an associated time to live (see the -# EXPIRE command) must be deleted from memory. -# 3) Because of a side effect of a command that stores data on a key that may -# already exist. For example the RENAME command may delete the old key -# content when it is replaced with another one. Similarly SUNIONSTORE -# or SORT with STORE option may delete existing keys. The SET command -# itself removes any old content of the specified key in order to replace -# it with the specified string. -# 4) During replication, when a replica performs a full resynchronization with -# its primary, the content of the whole database is removed in order to -# load the RDB file just transferred. -# -# In all the above cases, the default is to release memory in a non-blocking -# way. - -lazyfree-lazy-eviction yes -lazyfree-lazy-expire yes -lazyfree-lazy-server-del yes -replica-lazy-flush yes - -# For keys deleted using the DEL command, lazy freeing is controlled by the -# configuration directive 'lazyfree-lazy-user-del'. The default is 'yes'. The -# UNLINK command is identical to the DEL command, except that UNLINK always -# frees the memory lazily, regardless of this configuration directive: - -lazyfree-lazy-user-del yes - -# FLUSHDB, FLUSHALL, SCRIPT FLUSH and FUNCTION FLUSH support both asynchronous and synchronous -# deletion, which can be controlled by passing the [SYNC|ASYNC] flags into the -# commands. When neither flag is passed, this directive will be used to determine -# if the data should be deleted asynchronously. -# -# When a replica performs a node reset via CLUSTER RESET, the entire -# database content is removed to allow the node to become an empty primary. -# This directive also determines whether the data should be deleted asynchronously. -# -# There are many problems with running flush synchronously. Even in single CPU -# environments, the thread managers should balance between the freeing and -# serving incoming requests. The default value is yes. - -lazyfree-lazy-user-flush yes - -################################ THREADED I/O ################################# - -# The server is mostly single threaded, however there are certain threaded -# operations such as UNLINK, slow I/O accesses and other things that are -# performed on side threads. -# -# Now it is also possible to handle the server clients socket reads and writes -# in different I/O threads. Since especially writing is so slow, normally -# users use pipelining in order to speed up the server performances per -# core, and spawn multiple instances in order to scale more. Using I/O -# threads it is possible to easily speedup two times the server without resorting -# to pipelining nor sharding of the instance. -# -# By default threading is disabled, we suggest enabling it only in machines -# that have at least 3 or more cores, leaving at least one spare core. -# We also recommend using threaded I/O only if you actually have performance problems, with -# instances being able to use a quite big percentage of CPU time, otherwise -# there is no point in using this feature. -# -# So for instance if you have a four cores boxes, try to use 2 or 3 I/O -# threads, if you have a 8 cores, try to use 6 threads. In order to -# enable I/O threads use the following configuration directive: -# -# io-threads 4 -# -# Setting io-threads to 1 will just use the main thread as usual. -# When I/O threads are enabled, we use threads for reads and writes, that is -# to thread the write and read syscall and transfer the client buffers to the -# socket and to enable threading of reads and protocol parsing. -# -# When multiple commands are parsed by the I/O threads and ready for execution, -# we take advantage of knowing the next set of commands and prefetch their -# required dictionary entries in a batch. This reduces memory access costs. -# -# The optimal batch size depends on the specific workflow of the user. -# The default batch size is 16, which can be modified using the -# 'prefetch-batch-max-size' config. -# -# When the config is set to 0, prefetching is disabled. -# -# prefetch-batch-max-size 16 -# -# NOTE: -# 1. The 'io-threads-do-reads' config is deprecated and has no effect. Please -# avoid using this config if possible. -# -# 2. If you want to test the server speedup using valkey-benchmark, make -# sure you also run the benchmark itself in threaded mode, using the -# --threads option to match the number of server threads, otherwise you'll not -# be able to notice the improvements. - -############################ KERNEL OOM CONTROL ############################## - -# On Linux, it is possible to hint the kernel OOM killer on what processes -# should be killed first when out of memory. -# -# Enabling this feature makes the server actively control the oom_score_adj value -# for all its processes, depending on their role. The default scores will -# attempt to have background child processes killed before all others, and -# replicas killed before primaries. -# -# The server supports these options: -# -# no: Don't make changes to oom-score-adj (default). -# yes: Alias to "relative" see below. -# absolute: Values in oom-score-adj-values are written as is to the kernel. -# relative: Values are used relative to the initial value of oom_score_adj when -# the server starts and are then clamped to a range of -1000 to 1000. -# Because typically the initial value is 0, they will often match the -# absolute values. -oom-score-adj no - -# When oom-score-adj is used, this directive controls the specific values used -# for primary, replica and background child processes. Values range -2000 to -# 2000 (higher means more likely to be killed). -# -# Unprivileged processes (not root, and without CAP_SYS_RESOURCE capabilities) -# can freely increase their value, but not decrease it below its initial -# settings. This means that setting oom-score-adj to "relative" and setting the -# oom-score-adj-values to positive values will always succeed. -oom-score-adj-values 0 200 800 - - -#################### KERNEL transparent hugepage CONTROL ###################### - -# Usually the kernel Transparent Huge Pages control is set to "madvise" or -# "never" by default (/sys/kernel/mm/transparent_hugepage/enabled), in which -# case this config has no effect. On systems in which it is set to "always", -# the server will attempt to disable it specifically for the server process in order -# to avoid latency problems specifically with fork(2) and CoW. -# If for some reason you prefer to keep it enabled, you can set this config to -# "no" and the kernel global to "always". - -disable-thp yes - -############################## APPEND ONLY MODE ############################### - -# By default the server asynchronously dumps the dataset on disk. This mode is -# good enough in many applications, but an issue with the server process or -# a power outage may result into a few minutes of writes lost (depending on -# the configured save points). -# -# The Append Only File is an alternative persistence mode that provides -# much better durability. For instance using the default data fsync policy -# (see later in the config file) the server can lose just one second of writes in a -# dramatic event like a server power outage, or a single write if something -# wrong with the process itself happens, but the operating system is -# still running correctly. -# -# AOF and RDB persistence can be enabled at the same time without problems. -# If the AOF is enabled on startup the server will load the AOF, that is the file -# with the better durability guarantees. -# -# Note that changing this value in a config file of an existing database and -# restarting the server can lead to data loss. A conversion needs to be done -# by setting it via CONFIG command on a live server first. -# -# Please check https://valkey.io/topics/persistence for more information. - -appendonly no - -# The base name of the append only file. -# -# The server uses a set of append-only files to persist the dataset -# and changes applied to it. There are two basic types of files in use: -# -# - Base files, which are a snapshot representing the complete state of the -# dataset at the time the file was created. Base files can be either in -# the form of RDB (binary serialized) or AOF (textual commands). -# - Incremental files, which contain additional commands that were applied -# to the dataset following the previous file. -# -# In addition, manifest files are used to track the files and the order in -# which they were created and should be applied. -# -# Append-only file names are created by the server following a specific pattern. -# The file name's prefix is based on the 'appendfilename' configuration -# parameter, followed by additional information about the sequence and type. -# -# For example, if appendfilename is set to appendonly.aof, the following file -# names could be derived: -# -# - appendonly.aof.1.base.rdb as a base file. -# - appendonly.aof.1.incr.aof, appendonly.aof.2.incr.aof as incremental files. -# - appendonly.aof.manifest as a manifest file. - -appendfilename "appendonly.aof" - -# For convenience, the server stores all persistent append-only files in a dedicated -# directory. The name of the directory is determined by the appenddirname -# configuration parameter. - -appenddirname "appendonlydir" - -# The fsync() call tells the Operating System to actually write data on disk -# instead of waiting for more data in the output buffer. Some OS will really flush -# data on disk, some other OS will just try to do it ASAP. -# -# The server supports three different modes: -# -# no: don't fsync, just let the OS flush the data when it wants. Faster. -# always: fsync after every write to the append only log. Slow, Safest. -# everysec: fsync only one time every second. Compromise. -# -# The default is "everysec", as that's usually the right compromise between -# speed and data safety. It's up to you to understand if you can relax this to -# "no" that will let the operating system flush the output buffer when -# it wants, for better performances (but if you can live with the idea of -# some data loss consider the default persistence mode that's snapshotting), -# or on the contrary, use "always" that's very slow but a bit safer than -# everysec. -# -# More details please check the following article: -# http://antirez.com/post/redis-persistence-demystified.html -# -# If unsure, use "everysec". - -# appendfsync always -appendfsync everysec -# appendfsync no - -# When the AOF fsync policy is set to always or everysec, and a background -# saving process (a background save or AOF log background rewriting) is -# performing a lot of I/O against the disk, in some Linux configurations -# the server may block too long on the fsync() call. Note that there is no fix for -# this currently, as even performing fsync in a different thread will block -# our synchronous write(2) call. -# -# In order to mitigate this problem it's possible to use the following option -# that will prevent fsync() from being called in the main process while a -# BGSAVE or BGREWRITEAOF is in progress. -# -# This means that while another child is saving, the durability of the server is -# the same as "appendfsync no". In practical terms, this means that it is -# possible to lose up to 30 seconds of log in the worst scenario (with the -# default Linux settings). -# -# If you have latency problems turn this to "yes". Otherwise leave it as -# "no" that is the safest pick from the point of view of durability. - -no-appendfsync-on-rewrite no - -# Automatic rewrite of the append only file. -# The server is able to automatically rewrite the log file implicitly calling -# BGREWRITEAOF when the AOF log size grows by the specified percentage. -# -# This is how it works: The server remembers the size of the AOF file after the -# latest rewrite (if no rewrite has happened since the restart, the size of -# the AOF at startup is used). -# -# This base size is compared to the current size. If the current size is -# bigger than the specified percentage, the rewrite is triggered. Also -# you need to specify a minimal size for the AOF file to be rewritten, this -# is useful to avoid rewriting the AOF file even if the percentage increase -# is reached but it is still pretty small. -# -# Specify a percentage of zero in order to disable the automatic AOF -# rewrite feature. - -auto-aof-rewrite-percentage 100 -auto-aof-rewrite-min-size 64mb - -# An AOF file may be found to be truncated at the end during the server -# startup process, when the AOF data gets loaded back into memory. -# This may happen when the system where the server is running -# crashes, especially when an ext4 filesystem is mounted without the -# data=ordered option (however this can't happen when the server itself -# crashes or aborts but the operating system still works correctly). -# -# The server can either exit with an error when this happens, or load as much -# data as possible (the default now) and start if the AOF file is found -# to be truncated at the end. The following option controls this behavior. -# -# If aof-load-truncated is set to yes, a truncated AOF file is loaded and -# the server starts emitting a log to inform the user of the event. -# Otherwise if the option is set to no, the server aborts with an error -# and refuses to start. When the option is set to no, the user requires -# to fix the AOF file using the "valkey-check-aof" utility before to restart -# the server. -# -# Note that if the AOF file will be found to be corrupted in the middle -# the server will still exit with an error. This option only applies when -# the server will try to read more data from the AOF file but not enough bytes -# will be found. -aof-load-truncated yes - -# The server can create append-only base files in either RDB or AOF formats. Using -# the RDB format is always faster and more efficient, and disabling it is only -# supported for backward compatibility purposes. -aof-use-rdb-preamble yes - -# The server supports recording timestamp annotations in the AOF to support restoring -# the data from a specific point-in-time. However, using this capability changes -# the AOF format in a way that may not be compatible with existing AOF parsers. -aof-timestamp-enabled no - -################################ SHUTDOWN ##################################### - -# Maximum time to wait for replicas when shutting down, in seconds. -# -# During shut down, a grace period allows any lagging replicas to catch up with -# the latest replication offset before the primary exits. This period can -# prevent data loss, especially for deployments without configured disk backups. -# -# The 'shutdown-timeout' value is the grace period's duration in seconds. It is -# only applicable when the instance has replicas. To disable the feature, set -# the value to 0. -# -# shutdown-timeout 10 - -# When the server receives a SIGINT or SIGTERM, shutdown is initiated and by default -# an RDB snapshot is written to disk in a blocking operation if save points are configured. -# The options used on signaled shutdown can include the following values: -# default: Saves RDB snapshot only if save points are configured. -# Waits for lagging replicas to catch up. -# save: Forces a DB saving operation even if no save points are configured. -# nosave: Prevents DB saving operation even if one or more save points are configured. -# now: Skips waiting for lagging replicas. -# force: Ignores any errors that would normally prevent the server from exiting. -# -# Any combination of values is allowed as long as "save" and "nosave" are not set simultaneously. -# Example: "nosave force now" -# -# shutdown-on-sigint default -# shutdown-on-sigterm default - -################ NON-DETERMINISTIC LONG BLOCKING COMMANDS ##################### - -# Maximum time in milliseconds for EVAL scripts, functions and in some cases -# modules' commands before the server can start processing or rejecting other clients. -# -# If the maximum execution time is reached the server will start to reply to most -# commands with a BUSY error. -# -# In this state the server will only allow a handful of commands to be executed. -# For instance, SCRIPT KILL, FUNCTION KILL, SHUTDOWN NOSAVE and possibly some -# module specific 'allow-busy' commands. -# -# SCRIPT KILL and FUNCTION KILL will only be able to stop a script that did not -# yet call any write commands, so SHUTDOWN NOSAVE may be the only way to stop -# the server in the case a write command was already issued by the script when -# the user doesn't want to wait for the natural termination of the script. -# -# The default is 5 seconds. It is possible to set it to 0 or a negative value -# to disable this mechanism (uninterrupted execution). Note that in the past -# this config had a different name, which is now an alias, so both of these do -# the same: -# lua-time-limit 5000 -# busy-reply-threshold 5000 - -################################ VALKEY CLUSTER ############################### - -# Normal server instances can't be part of a cluster; only nodes that are -# started as cluster nodes can. In order to start a server instance as a -# cluster node enable the cluster support uncommenting the following: -# -# cluster-enabled yes - -# Every cluster node has a cluster configuration file. This file is not -# intended to be edited by hand. It is created and updated by each node. -# Every cluster node requires a different cluster configuration file. -# Make sure that instances running in the same system do not have -# overlapping cluster configuration file names. -# -# cluster-config-file nodes-6379.conf - -# Cluster node timeout is the amount of milliseconds a node must be unreachable -# for it to be considered in failure state. -# Most other internal time limits are a multiple of the node timeout. -# -# cluster-node-timeout 15000 - -# The cluster port is the port that the cluster bus will listen for inbound connections on. When set -# to the default value, 0, it will be bound to the command port + 10000. Setting this value requires -# you to specify the cluster bus port when executing cluster meet. -# cluster-port 0 - -# A replica of a failing primary will avoid to start a failover if its data -# looks too old. -# -# There is no simple way for a replica to actually have an exact measure of -# its "data age", so the following two checks are performed: -# -# 1) If there are multiple replicas able to failover, they exchange messages -# in order to try to give an advantage to the replica with the best -# replication offset (more data from the primary processed). -# Replicas will try to get their rank by offset, and apply to the start -# of the failover a delay proportional to their rank. -# -# 2) Every single replica computes the time of the last interaction with -# its primary. This can be the last ping or command received (if the primary -# is still in the "connected" state), or the time that elapsed since the -# disconnection with the primary (if the replication link is currently down). -# If the last interaction is too old, the replica will not try to failover -# at all. -# -# The point "2" can be tuned by user. Specifically a replica will not perform -# the failover if, since the last interaction with the primary, the time -# elapsed is greater than: -# -# (node-timeout * cluster-replica-validity-factor) + repl-ping-replica-period -# -# So for example if node-timeout is 30 seconds, and the cluster-replica-validity-factor -# is 10, and assuming a default repl-ping-replica-period of 10 seconds, the -# replica will not try to failover if it was not able to talk with the primary -# for longer than 310 seconds. -# -# A large cluster-replica-validity-factor may allow replicas with too old data to failover -# a primary, while a too small value may prevent the cluster from being able to -# elect a replica at all. -# -# For maximum availability, it is possible to set the cluster-replica-validity-factor -# to a value of 0, which means, that replicas will always try to failover the -# primary regardless of the last time they interacted with the primary. -# (However they'll always try to apply a delay proportional to their -# offset rank). -# -# Zero is the only value able to guarantee that when all the partitions heal -# the cluster will always be able to continue. -# -# cluster-replica-validity-factor 10 - -# Cluster replicas are able to migrate to orphaned primaries, that are primaries -# that are left without working replicas. This improves the cluster ability -# to resist to failures as otherwise an orphaned primary can't be failed over -# in case of failure if it has no working replicas. -# -# Replicas migrate to orphaned primaries only if there are still at least a -# given number of other working replicas for their old primary. This number -# is the "migration barrier". A migration barrier of 1 means that a replica -# will migrate only if there is at least 1 other working replica for its primary -# and so forth. It usually reflects the number of replicas you want for every -# primary in your cluster. -# -# Default is 1 (replicas migrate only if their primaries remain with at least -# one replica). To disable migration just set it to a very large value or -# set cluster-allow-replica-migration to 'no'. -# A value of 0 can be set but is useful only for debugging and dangerous -# in production. -# -# cluster-migration-barrier 1 - -# Turning off this option allows to use less automatic cluster configuration. -# It disables migration of replicas to orphaned primaries. Masters that become -# empty due to losing their last slots to another primary will not automatically -# replicate from the primary that took over their last slots. Instead, they will -# remain as empty primaries without any slots. -# -# Default is 'yes' (allow automatic migrations). -# -# cluster-allow-replica-migration yes - -# By default cluster nodes stop accepting queries if they detect there -# is at least a hash slot uncovered (no available node is serving it). -# This way if the cluster is partially down (for example a range of hash slots -# are no longer covered) all the cluster becomes, eventually, unavailable. -# It automatically returns available as soon as all the slots are covered again. -# -# However sometimes you want the subset of the cluster which is working, -# to continue to accept queries for the part of the key space that is still -# covered. In order to do so, just set the cluster-require-full-coverage -# option to no. -# -# cluster-require-full-coverage yes - -# This option, when set to yes, prevents replicas from trying to failover its -# primary during primary failures. However the replica can still perform a -# manual failover, if forced to do so. -# -# This is useful in different scenarios, especially in the case of multiple -# data center operations, where we want one side to never be promoted if not -# in the case of a total DC failure. -# -# cluster-replica-no-failover no - -# This option, when set to yes, allows nodes to serve read traffic while the -# cluster is in a down state, as long as it believes it owns the slots. -# -# This is useful for two cases. The first case is for when an application -# doesn't require consistency of data during node failures or network partitions. -# One example of this is a cache, where as long as the node has the data it -# should be able to serve it. -# -# The second use case is for configurations that don't meet the recommended -# three shards but want to enable cluster mode and scale later. A -# primary outage in a 1 or 2 shard configuration causes a read/write outage to the -# entire cluster without this option set, with it set there is only a write outage. -# Without a quorum of primaries, slot ownership will not change automatically. -# -# cluster-allow-reads-when-down no - -# This option, when set to yes, allows nodes to serve pubsub shard traffic while -# the cluster is in a down state, as long as it believes it owns the slots. -# -# This is useful if the application would like to use the pubsub feature even when -# the cluster global stable state is not OK. If the application wants to make sure only -# one shard is serving a given channel, this feature should be kept as yes. -# -# cluster-allow-pubsubshard-when-down yes - -# Cluster link send buffer limit is the limit on the memory usage of an individual -# cluster bus link's send buffer in bytes. Cluster links would be freed if they exceed -# this limit. This is to primarily prevent send buffers from growing unbounded on links -# toward slow peers (E.g. PubSub messages being piled up). -# This limit is disabled by default. Enable this limit when 'mem_cluster_links' INFO field -# and/or 'send-buffer-allocated' entries in the 'CLUSTER LINKS` command output continuously increase. -# Minimum limit of 1gb is recommended so that cluster link buffer can fit in at least a single -# PubSub message by default. (client-query-buffer-limit default value is 1gb) -# -# cluster-link-sendbuf-limit 0 - -# Clusters can configure their announced hostname using this config. This is a common use case for -# applications that need to use TLS Server Name Indication (SNI) or dealing with DNS based -# routing. By default this value is only shown as additional metadata in the CLUSTER SLOTS -# command, but can be changed using 'cluster-preferred-endpoint-type' config. This value is -# communicated along the clusterbus to all nodes, setting it to an empty string will remove -# the hostname and also propagate the removal. -# -# cluster-announce-hostname "" - -# Clusters can configure an optional nodename to be used in addition to the node ID for -# debugging and admin information. This name is broadcasted between nodes, so will be used -# in addition to the node ID when reporting cross node events such as node failures. -# cluster-announce-human-nodename "" - -# Clusters can advertise how clients should connect to them using either their IP address, -# a user defined hostname, or by declaring they have no endpoint. Which endpoint is -# shown as the preferred endpoint is set by using the cluster-preferred-endpoint-type -# config with values 'ip', 'hostname', or 'unknown-endpoint'. This value controls how -# the endpoint returned for MOVED/ASKING requests as well as the first field of CLUSTER SLOTS. -# If the preferred endpoint type is set to hostname, but no announced hostname is set, a '?' -# will be returned instead. -# -# When a cluster advertises itself as having an unknown endpoint, it's indicating that -# the server doesn't know how clients can reach the cluster. This can happen in certain -# networking situations where there are multiple possible routes to the node, and the -# server doesn't know which one the client took. In this case, the server is expecting -# the client to reach out on the same endpoint it used for making the last request, but use -# the port provided in the response. -# -# cluster-preferred-endpoint-type ip - -# The cluster blacklist is used when removing a node from the cluster completely. -# When CLUSTER FORGET is called for a node, that node is put into the blacklist for -# some time so that when gossip messages are received from other nodes that still -# remember it, it is not re-added. This gives time for CLUSTER FORGET to be sent to -# every node in the cluster. The blacklist TTL is 60 seconds by default, which should -# be sufficient for most clusters, but you may considering increasing this if you see -# nodes getting re-added while using CLUSTER FORGET. -# -# cluster-blacklist-ttl 60 - -# Clusters can be configured to track per-slot resource statistics, -# which are accessible by the CLUSTER SLOT-STATS command. -# -# By default, the 'cluster-slot-stats-enabled' is disabled, and only 'key-count' is captured. -# By enabling the 'cluster-slot-stats-enabled' config, the cluster will begin to capture advanced statistics. -# These statistics can be leveraged to assess general slot usage trends, identify hot / cold slots, -# migrate slots for a balanced cluster workload, and / or re-write application logic to better utilize slots. -# -# cluster-slot-stats-enabled no - -# In order to setup your cluster make sure to read the documentation -# available at https://valkey.io web site. - -########################## CLUSTER DOCKER/NAT support ######################## - -# In certain deployments, cluster node's address discovery fails, because -# addresses are NAT-ted or because ports are forwarded (the typical case is -# Docker and other containers). -# -# In order to make a cluster work in such environments, a static -# configuration where each node knows its public address is needed. The -# following options are used for this scope, and are: -# -# * cluster-announce-ip -# * cluster-announce-client-ipv4 -# * cluster-announce-client-ipv6 -# * cluster-announce-port -# * cluster-announce-tls-port -# * cluster-announce-bus-port -# -# Each instructs the node about its address, possibly other addresses to expose -# to clients, client ports (for connections without and with TLS) and cluster -# message bus port. The information is then published in the bus packets so that -# other nodes will be able to correctly map the address of the node publishing -# the information. -# -# If tls-cluster is set to yes and cluster-announce-tls-port is omitted or set -# to zero, then cluster-announce-port refers to the TLS port. Note also that -# cluster-announce-tls-port has no effect if tls-cluster is set to no. -# -# If cluster-announce-client-ipv4 and cluster-announce-client-ipv6 are omitted, -# then cluster-announce-ip is exposed to clients. -# -# If the above options are not used, the normal cluster auto-detection -# will be used instead. -# -# Note that when remapped, the bus port may not be at the fixed offset of -# clients port + 10000, so you can specify any port and bus-port depending -# on how they get remapped. If the bus-port is not set, a fixed offset of -# 10000 will be used as usual. -# -# Example: -# -# cluster-announce-ip 10.1.1.5 -# cluster-announce-client-ipv4 123.123.123.5 -# cluster-announce-client-ipv6 2001:db8::8a2e:370:7334 -# cluster-announce-tls-port 6379 -# cluster-announce-port 0 -# cluster-announce-bus-port 6380 - -################################## SLOW LOG ################################### - -# The server Slow Log is a system to log queries that exceeded a specified -# execution time. The execution time does not include the I/O operations -# like talking with the client, sending the reply and so forth, -# but just the time needed to actually execute the command (this is the only -# stage of command execution where the thread is blocked and can not serve -# other requests in the meantime). -# -# You can configure the slow log with two parameters: one tells the server -# what is the execution time, in microseconds, to exceed in order for the -# command to get logged, and the other parameter is the length of the -# slow log. When a new command is logged the oldest one is removed from the -# queue of logged commands. - -# The following time is expressed in microseconds, so 1000000 is equivalent -# to one second. Note that a negative number disables the slow log, while -# a value of zero forces the logging of every command. -slowlog-log-slower-than 10000 - -# There is no limit to this length. Just be aware that it will consume memory. -# You can reclaim memory used by the slow log with SLOWLOG RESET. -slowlog-max-len 128 - -################################ LATENCY MONITOR ############################## - -# The server latency monitoring subsystem samples different operations -# at runtime in order to collect data related to possible sources of -# latency of a server instance. -# -# Via the LATENCY command this information is available to the user that can -# print graphs and obtain reports. -# -# The system only logs operations that were performed in a time equal or -# greater than the amount of milliseconds specified via the -# latency-monitor-threshold configuration directive. When its value is set -# to zero, the latency monitor is turned off. -# -# By default latency monitoring is disabled since it is mostly not needed -# if you don't have latency issues, and collecting data has a performance -# impact, that while very small, can be measured under big load. Latency -# monitoring can easily be enabled at runtime using the command -# "CONFIG SET latency-monitor-threshold " if needed. -latency-monitor-threshold 0 - -################################ LATENCY TRACKING ############################## - -# The server's extended latency monitoring tracks the per command latencies and enables -# exporting the percentile distribution via the INFO latencystats command, -# and cumulative latency distributions (histograms) via the LATENCY command. -# -# By default, the extended latency monitoring is enabled since the overhead -# of keeping track of the command latency is very small. -# latency-tracking yes - -# By default the exported latency percentiles via the INFO latencystats command -# are the p50, p99, and p999. -# latency-tracking-info-percentiles 50 99 99.9 - -############################# EVENT NOTIFICATION ############################## - -# The server can notify Pub/Sub clients about events happening in the key space. -# This feature is documented at https://valkey.io/topics/notifications -# -# For instance if keyspace events notification is enabled, and a client -# performs a DEL operation on key "foo" stored in the Database 0, two -# messages will be published via Pub/Sub: -# -# PUBLISH __keyspace@0__:foo del -# PUBLISH __keyevent@0__:del foo -# -# It is possible to select the events that the server will notify among a set -# of classes. Every class is identified by a single character: -# -# K Keyspace events, published with __keyspace@__ prefix. -# E Keyevent events, published with __keyevent@__ prefix. -# g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... -# $ String commands -# l List commands -# s Set commands -# h Hash commands -# z Sorted set commands -# x Expired events (events generated every time a key expires) -# e Evicted events (events generated when a key is evicted for maxmemory) -# n New key events (Note: not included in the 'A' class) -# t Stream commands -# d Module key type events -# m Key-miss events (Note: It is not included in the 'A' class) -# A Alias for g$lshzxetd, so that the "AKE" string means all the events -# (Except key-miss events which are excluded from 'A' due to their -# unique nature). -# -# The "notify-keyspace-events" takes as argument a string that is composed -# of zero or multiple characters. The empty string means that notifications -# are disabled. -# -# Example: to enable list and generic events, from the point of view of the -# event name, use: -# -# notify-keyspace-events Elg -# -# Example 2: to get the stream of the expired keys subscribing to channel -# name __keyevent@0__:expired use: -# -# notify-keyspace-events Ex -# -# By default all notifications are disabled because most users don't need -# this feature and the feature has some overhead. Note that if you don't -# specify at least one of K or E, no events will be delivered. -notify-keyspace-events "" - -############################### ADVANCED CONFIG ############################### - -# Hashes are encoded using a memory efficient data structure when they have a -# small number of entries, and the biggest entry does not exceed a given -# threshold. These thresholds can be configured using the following directives. -hash-max-listpack-entries 512 -hash-max-listpack-value 64 - -# Lists are also encoded in a special way to save a lot of space. -# The number of entries allowed per internal list node can be specified -# as a fixed maximum size or a maximum number of elements. -# For a fixed maximum size, use -5 through -1, meaning: -# -5: max size: 64 Kb <-- not recommended for normal workloads -# -4: max size: 32 Kb <-- not recommended -# -3: max size: 16 Kb <-- probably not recommended -# -2: max size: 8 Kb <-- good -# -1: max size: 4 Kb <-- good -# Positive numbers mean store up to _exactly_ that number of elements -# per list node. -# The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size), -# but if your use case is unique, adjust the settings as necessary. -list-max-listpack-size -2 - -# Lists may also be compressed. -# Compress depth is the number of quicklist ziplist nodes from *each* side of -# the list to *exclude* from compression. The head and tail of the list -# are always uncompressed for fast push/pop operations. Settings are: -# 0: disable all list compression -# 1: depth 1 means "don't start compressing until after 1 node into the list, -# going from either the head or tail" -# So: [head]->node->node->...->node->[tail] -# [head], [tail] will always be uncompressed; inner nodes will compress. -# 2: [head]->[next]->node->node->...->node->[prev]->[tail] -# 2 here means: don't compress head or head->next or tail->prev or tail, -# but compress all nodes between them. -# 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail] -# etc. -list-compress-depth 0 - -# Sets have a special encoding when a set is composed -# of just strings that happen to be integers in radix 10 in the range -# of 64 bit signed integers. -# The following configuration setting sets the limit in the size of the -# set in order to use this special memory saving encoding. -set-max-intset-entries 512 - -# Sets containing non-integer values are also encoded using a memory efficient -# data structure when they have a small number of entries, and the biggest entry -# does not exceed a given threshold. These thresholds can be configured using -# the following directives. -set-max-listpack-entries 128 -set-max-listpack-value 64 - -# Similarly to hashes and lists, sorted sets are also specially encoded in -# order to save a lot of space. This encoding is only used when the length and -# elements of a sorted set are below the following limits: -zset-max-listpack-entries 128 -zset-max-listpack-value 64 - -# HyperLogLog sparse representation bytes limit. The limit includes the -# 16 bytes header. When a HyperLogLog using the sparse representation crosses -# this limit, it is converted into the dense representation. -# -# A value greater than 16000 is totally useless, since at that point the -# dense representation is more memory efficient. -# -# The suggested value is ~ 3000 in order to have the benefits of -# the space efficient encoding without slowing down too much PFADD, -# which is O(N) with the sparse encoding. The value can be raised to -# ~ 10000 when CPU is not a concern, but space is, and the data set is -# composed of many HyperLogLogs with cardinality in the 0 - 15000 range. -hll-sparse-max-bytes 3000 - -# Streams macro node max size / items. The stream data structure is a radix -# tree of big nodes that encode multiple items inside. Using this configuration -# it is possible to configure how big a single node can be in bytes, and the -# maximum number of items it may contain before switching to a new node when -# appending new stream entries. If any of the following settings are set to -# zero, the limit is ignored, so for instance it is possible to set just a -# max entries limit by setting max-bytes to 0 and max-entries to the desired -# value. -stream-node-max-bytes 4096 -stream-node-max-entries 100 - -# Active rehashing uses 1% of the CPU time to help perform incremental rehashing -# of the main server hash tables, the ones mapping top-level keys to values. -# -# If active rehashing is disabled and rehashing is needed, a hash table is -# rehashed one "step" on every operation performed on the hash table (add, find, -# etc.), so if the server is idle, the rehashing may never complete and some -# more memory is used by the hash tables. Active rehashing helps prevent this. -# -# Active rehashing runs as a background task. Depending on the value of 'hz', -# the frequency at which the server performs background tasks, active rehashing -# can cause the server to freeze for a short time. For example, if 'hz' is set -# to 10, active rehashing runs for up to one millisecond every 100 milliseconds. -# If a freeze of one millisecond is not acceptable, you can increase 'hz' to let -# active rehashing run more often. If instead 'hz' is set to 100, active -# rehashing runs up to only 100 microseconds every 10 milliseconds. The total is -# still 1% of the time. -activerehashing yes - -# The client output buffer limits can be used to force disconnection of clients -# that are not reading data from the server fast enough for some reason (a -# common reason is that a Pub/Sub client can't consume messages as fast as the -# publisher can produce them). -# -# The limit can be set differently for the three different classes of clients: -# -# normal -> normal clients including MONITOR clients -# replica -> replica clients -# pubsub -> clients subscribed to at least one pubsub channel or pattern -# -# The syntax of every client-output-buffer-limit directive is the following: -# -# client-output-buffer-limit -# -# A client is immediately disconnected once the hard limit is reached, or if -# the soft limit is reached and remains reached for the specified number of -# seconds (continuously). -# So for instance if the hard limit is 32 megabytes and the soft limit is -# 16 megabytes / 10 seconds, the client will get disconnected immediately -# if the size of the output buffers reach 32 megabytes, but will also get -# disconnected if the client reaches 16 megabytes and continuously overcomes -# the limit for 10 seconds. -# -# By default normal clients are not limited because they don't receive data -# without asking (in a push way), but just after a request, so only -# asynchronous clients may create a scenario where data is requested faster -# than it can read. -# -# Instead there is a default limit for pubsub and replica clients, since -# subscribers and replicas receive data in a push fashion. -# -# Note that it doesn't make sense to set the replica clients output buffer -# limit lower than the repl-backlog-size config (partial sync will succeed -# and then replica will get disconnected). -# Such a configuration is ignored (the size of repl-backlog-size will be used). -# This doesn't have memory consumption implications since the replica client -# will share the backlog buffers memory. -# -# Both the hard or the soft limit can be disabled by setting them to zero. -client-output-buffer-limit normal 0 0 0 -client-output-buffer-limit replica 256mb 64mb 60 -client-output-buffer-limit pubsub 32mb 8mb 60 - -# Client query buffers accumulate new commands. They are limited to a fixed -# amount by default in order to avoid that a protocol desynchronization (for -# instance due to a bug in the client) will lead to unbound memory usage in -# the query buffer. However you can configure it here if you have very special -# needs, such as a command with huge argument, or huge multi/exec requests or alike. -# -# client-query-buffer-limit 1gb - -# In some scenarios client connections can hog up memory leading to OOM -# errors or data eviction. To avoid this we can cap the accumulated memory -# used by all client connections (all pubsub and normal clients). Once we -# reach that limit connections will be dropped by the server freeing up -# memory. The server will attempt to drop the connections using the most -# memory first. We call this mechanism "client eviction". -# -# Client eviction is configured using the maxmemory-clients setting as follows: -# 0 - client eviction is disabled (default) -# -# A memory value can be used for the client eviction threshold, -# for example: -# maxmemory-clients 1g -# -# A percentage value (between 1% and 100%) means the client eviction threshold -# is based on a percentage of the maxmemory setting. For example to set client -# eviction at 5% of maxmemory: -# maxmemory-clients 5% - -# In the server protocol, bulk requests, that are, elements representing single -# strings, are normally limited to 512 mb. However you can change this limit -# here, but must be 1mb or greater -# -# proto-max-bulk-len 512mb - -# The server calls an internal function to perform many background tasks, like -# closing connections of clients in timeout, purging expired keys that are -# never requested, and so forth. -# -# Not all tasks are performed with the same frequency, but the server checks for -# tasks to perform according to the specified "hz" value. -# -# By default "hz" is set to 10. Raising the value will use more CPU when -# the server is idle, but at the same time will make the server more responsive when -# there are many keys expiring at the same time, and timeouts may be -# handled with more precision. -# -# The range is between 1 and 500, however a value over 100 is usually not -# a good idea. Most users should use the default of 10 and raise this up to -# 100 only in environments where very low latency is required. -hz 10 - -# Normally it is useful to have an HZ value which is proportional to the -# number of clients connected. This is useful in order, for instance, to -# avoid too many clients are processed for each background task invocation -# in order to avoid latency spikes. -# -# Since the default HZ value by default is conservatively set to 10, the server -# offers, and enables by default, the ability to use an adaptive HZ value -# which will temporarily raise when there are many connected clients. -# -# When dynamic HZ is enabled, the actual configured HZ will be used -# as a baseline, but multiples of the configured HZ value will be actually -# used as needed once more clients are connected. In this way an idle -# instance will use very little CPU time while a busy instance will be -# more responsive. -dynamic-hz yes - -# When a child rewrites the AOF file, if the following option is enabled -# the file will be fsync-ed every 4 MB of data generated. This is useful -# in order to commit the file to the disk more incrementally and avoid -# big latency spikes. -aof-rewrite-incremental-fsync yes - -# When the server saves RDB file, if the following option is enabled -# the file will be fsync-ed every 4 MB of data generated. This is useful -# in order to commit the file to the disk more incrementally and avoid -# big latency spikes. -rdb-save-incremental-fsync yes - -# The server's LFU eviction (see maxmemory setting) can be tuned. However it is a good -# idea to start with the default settings and only change them after investigating -# how to improve the performances and how the keys LFU change over time, which -# is possible to inspect via the OBJECT FREQ command. -# -# There are two tunable parameters in the server LFU implementation: the -# counter logarithm factor and the counter decay time. It is important to -# understand what the two parameters mean before changing them. -# -# The LFU counter is just 8 bits per key, it's maximum value is 255, so the server -# uses a probabilistic increment with logarithmic behavior. Given the value -# of the old counter, when a key is accessed, the counter is incremented in -# this way: -# -# 1. A random number R between 0 and 1 is extracted. -# 2. A probability P is calculated as 1/(old_value*lfu_log_factor+1). -# 3. The counter is incremented only if R < P. -# -# The default lfu-log-factor is 10. This is a table of how the frequency -# counter changes with a different number of accesses with different -# logarithmic factors: -# -# +--------+------------+------------+------------+------------+------------+ -# | factor | 100 hits | 1000 hits | 100K hits | 1M hits | 10M hits | -# +--------+------------+------------+------------+------------+------------+ -# | 0 | 104 | 255 | 255 | 255 | 255 | -# +--------+------------+------------+------------+------------+------------+ -# | 1 | 18 | 49 | 255 | 255 | 255 | -# +--------+------------+------------+------------+------------+------------+ -# | 10 | 10 | 18 | 142 | 255 | 255 | -# +--------+------------+------------+------------+------------+------------+ -# | 100 | 8 | 11 | 49 | 143 | 255 | -# +--------+------------+------------+------------+------------+------------+ -# -# NOTE: The above table was obtained by running the following commands: -# -# valkey-benchmark -n 1000000 incr foo -# valkey-cli object freq foo -# -# NOTE 2: The counter initial value is 5 in order to give new objects a chance -# to accumulate hits. -# -# The counter decay time is the time, in minutes, that must elapse in order -# for the key counter to be decremented. -# -# The default value for the lfu-decay-time is 1. A special value of 0 means we -# will never decay the counter. -# -# lfu-log-factor 10 -# lfu-decay-time 1 - - -# The maximum number of new client connections accepted per event-loop cycle. This configuration -# is set independently for TLS connections. -# -# By default, up to 10 new connection will be accepted per event-loop cycle for normal connections -# and up to 1 new connection per event-loop cycle for TLS connections. -# -# Adjusting this to a larger number can slightly improve efficiency for new connections -# at the risk of causing timeouts for regular commands on established connections. It is -# not advised to change this without ensuring that all clients have limited connection -# pools and exponential backoff in the case of command/connection timeouts. -# -# If your application is establishing a large number of new connections per second you should -# also consider tuning the value of tcp-backlog, which allows the kernel to buffer more -# pending connections before dropping or rejecting connections. -# -# max-new-connections-per-cycle 10 -# max-new-tls-connections-per-cycle 1 - - -########################### ACTIVE DEFRAGMENTATION ####################### -# -# What is active defragmentation? -# ------------------------------- -# -# Active (online) defragmentation allows a server to compact the -# spaces left between small allocations and deallocations of data in memory, -# thus allowing to reclaim back memory. -# -# Fragmentation is a natural process that happens with every allocator (but -# less so with Jemalloc, fortunately) and certain workloads. Normally a server -# restart is needed in order to lower the fragmentation, or at least to flush -# away all the data and create it again. However thanks to this feature -# implemented by Oran Agra, this process can happen at runtime -# in a "hot" way, while the server is running. -# -# Basically when the fragmentation is over a certain level (see the -# configuration options below) the server will start to create new copies of the -# values in contiguous memory regions by exploiting certain specific Jemalloc -# features (in order to understand if an allocation is causing fragmentation -# and to allocate it in a better place), and at the same time, will release the -# old copies of the data. This process, repeated incrementally for all the keys -# will cause the fragmentation to drop back to normal values. -# -# Important things to understand: -# -# 1. This feature is disabled by default, and only works if you compiled the server -# to use the copy of Jemalloc we ship with the source code of the server. -# This is the default with Linux builds. -# -# 2. You never need to enable this feature if you don't have fragmentation -# issues. -# -# 3. Once you experience fragmentation, you can enable this feature when -# needed with the command "CONFIG SET activedefrag yes". -# -# The configuration parameters are able to fine tune the behavior of the -# defragmentation process. If you are not sure about what they mean it is -# a good idea to leave the defaults untouched. - -# Active defragmentation is disabled by default -# activedefrag no - -# Minimum amount of fragmentation waste to start active defrag -# active-defrag-ignore-bytes 100mb - -# Minimum percentage of fragmentation to start active defrag -# active-defrag-threshold-lower 10 - -# Maximum percentage of fragmentation at which we use maximum effort -# active-defrag-threshold-upper 100 - -# Minimal effort for defrag in CPU percentage, to be used when the lower -# threshold is reached -# active-defrag-cycle-min 1 - -# Maximal effort for defrag in CPU percentage, to be used when the upper -# threshold is reached -# active-defrag-cycle-max 25 - -# Maximum number of set/hash/zset/list fields that will be processed from -# the main dictionary scan -# active-defrag-max-scan-fields 1000 - -# Jemalloc background thread for purging will be enabled by default -jemalloc-bg-thread yes - -# It is possible to pin different threads and processes of the server to specific -# CPUs in your system, in order to maximize the performances of the server. -# This is useful both in order to pin different server threads in different -# CPUs, but also in order to make sure that multiple server instances running -# in the same host will be pinned to different CPUs. -# -# Normally you can do this using the "taskset" command, however it is also -# possible to do this via the server configuration directly, both in Linux and FreeBSD. -# -# You can pin the server/IO threads, bio threads, aof rewrite child process, and -# the bgsave child process. The syntax to specify the cpu list is the same as -# the taskset command: -# -# Set server/io threads to cpu affinity 0,2,4,6: -# server-cpulist 0-7:2 -# -# Set bio threads to cpu affinity 1,3: -# bio-cpulist 1,3 -# -# Set aof rewrite child process to cpu affinity 8,9,10,11: -# aof-rewrite-cpulist 8-11 -# -# Set bgsave child process to cpu affinity 1,10,11 -# bgsave-cpulist 1,10-11 - -# In some cases the server will emit warnings and even refuse to start if it detects -# that the system is in bad state, it is possible to suppress these warnings -# by setting the following config which takes a space delimited list of warnings -# to suppress -# -# ignore-warnings ARM64-COW-BUG - -# Inform Valkey of the availability zone if running in a cloud environment. Currently -# this is only exposed via the info command for clients to use, but in the future we -# we may also use this when making decisions for replication. -# -# availability-zone "zone-name" diff --git a/tests/integrationtests/test_ephemeral_processing.py b/tests/integrationtests/test_ephemeral_processing.py new file mode 100644 index 0000000..f0de588 --- /dev/null +++ b/tests/integrationtests/test_ephemeral_processing.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python +"""Copyright (c) 2018-2024 mundialis GmbH & Co. KG. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +First test +""" + +__license__ = "GPLv3" +__author__ = "Anika Weinmann" +__copyright__ = "Copyright 2022 mundialis GmbH & Co. KG" +__maintainer__ = "mundialis GmbH & Co. KG" + +from actinia_core.core.resource_data_container import ResourceDataContainer +from actinia_processing_lib.utils import try_import + + +base_url_data = "https://apps.mundialis.de/actinia_test_datasets" +polygon_gml = f"{base_url_data}/polygon.gml" +process_chain_vector_import_info = { + "list": [ + { + "id": "v_info", + "inputs": [ + { + "import_descr": { + "source": polygon_gml, + "type": "vector", + }, + "param": "map", + "value": "polygon", + } + ], + "module": "v.info", + "flags": "g", + } + ], + "version": "1", +} + + +def test_ephemeral_processing(): + """Test basic processing.""" + + EphemeralProcessing = try_import( + "actinia_processing_lib.ephemeral_processing", + "EphemeralProcessing", + ) + + # 'grass_data_base', 'grass_user_data_base', 'grass_base_dir', 'request_data', 'user_id', 'user_group', 'resource_id', 'iteration', 'status_url', 'api_info', 'resource_url_base', 'orig_time', 'orig_datetime', 'user_credentials', 'config', 'project_name', 'mapset_name', and 'map_name' + rdc = ResourceDataContainer( + '/actinia_core/grassdb', + '/actinia_core/userdata', + '/usr/local/grass', + process_chain_vector_import_info, + 'user', 'user', 'resource_id-1234', None, + 'http://localhost:8000/api/v1/status', + { + 'endpoint': 'asyncephemeralexportresource', + 'method': 'POST', + 'path': '/api/v3/locations/nc_spm_08/processing_async_export', + 'request_url': 'http://0.0.0.0:8088/api/v3/locations/nc_spm_08/processing_async_export' + }, + 'http://0.0.0.0:8088/api/v3/resources/actinia-gdi/resource_id-0b2fdafe-6311-4ac8-a139-c4dcb783bada/__None__', + 1749204596.1047864, + '2025-06-06 10:09:56.104791', + { + 'user_id': 'user', + 'password_hash': '1234', + 'user_role': 'user', + 'user_group': 'user', + 'permissions': { + 'process_time_limit': 1800, + 'cell_limit': 1, + 'process_num_limit': 10, + 'accessible_modules': [ + 'v.info', + ] + } + }, + { + 'LOG_LEVEL': 3, + 'AUTHENTICATION': False, + 'CHECK_CREDENTIALS': False + }, + 'nc_spm_08', None, None + ) + + processing = EphemeralProcessing(rdc) + processing.run() + + assert processing.finish_message == 'Processing successfully finished', \ + f"Expected 'Processing successfully finished', got '{processing.finish_message}'" + assert processing.last_module == 'v.info', \ + f"Expected 'v.info', got '{processing.last_module}'" + assert processing.number_of_processes == 3, \ + f"Expected 3 processes, got {processing.number_of_processes}" + assert processing.progress == {'step': 3, 'num_of_steps': 3}, \ + f"Expected progress to be {{'step': 3, 'num_of_steps': 3}}, got {processing.progress}" + assert processing.progress_steps == 3, \ + f"Expected progress_steps to be 3, got {processing.progress_steps}" + assert processing.run_state == {'success': None}, \ + f"Expected run_state to be {{'success': None}}, got {processing.run_state}" + + + # (Pdb) dir(processing) # removed internal attributes + # ['actinia_process_dict', 'actinia_process_list', 'api_info', 'cell_limit', 'config', 'data', 'finish_message', 'ginit', 'ginit_tmpfiles', 'global_project_path', 'grass_base_dir', 'grass_data_base', 'grass_temp_database', 'grass_user_data_base', 'has_fluent', 'interim_result', 'is_global_database', 'iteration', 'last_module', 'lock_interface', 'map_name', 'mapset_name', 'message_logger', 'module_output_dict', 'module_output_log', 'module_results', 'number_of_processes', 'orig_datetime', 'orig_time', 'output_parser_list', 'proc_chain_converter', 'process_chain_list', 'process_count', 'process_dict', 'process_num_limit', 'process_time_limit', 'progress', 'progress_steps', 'project_name', 'rdc', 'request_data', 'required_mapsets', 'resource_export_list', 'resource_id', 'resource_logger', 'resource_url_list', 'response_model_class', 'run', 'run_state', 'setup_flag', 'skip_region_check', 'status_url', 'temp_file_count', 'temp_file_path', 'temp_grass_data_base', 'temp_grass_data_base_name', 'temp_mapset_name', 'temp_mapset_path', 'temp_project_path', 'temporary_pc_files', 'unique_id', 'user_credentials', 'user_group', 'user_id', 'user_project_path', 'webhook_auth', 'webhook_finished', 'webhook_update'] diff --git a/tests/integrationtests/test_helloworld.py b/tests/integrationtests/test_helloworld.py deleted file mode 100644 index 383cd64..0000000 --- a/tests/integrationtests/test_helloworld.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env python -"""Copyright (c) 2018-2025 mundialis GmbH & Co. KG. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -Hello World test -""" - -__license__ = "GPLv3" -__author__ = "Anika Weinmann" -__copyright__ = "Copyright 2022 mundialis GmbH & Co. KG" -__maintainer__ = "mundialis GmbH & Co. KG" - - -import json - -import pytest -from actinia_api import URL_PREFIX -from flask import Response - -from tests.testsuite import ActiniaTestCase - - -class ActiniaHelloWorldTest(ActiniaTestCase): - """Actinia hello world test class for hello world endpoint.""" - - @pytest.mark.integrationtest - def test_get_helloworld(self) -> None: - """Test the get method of the /helloworld endpoint.""" - resp = self.app.get(f"{URL_PREFIX}/helloworld") - - assert isinstance( - resp, - Response, - ), "The response is not of type Response" - assert resp.status_code == 200, "The status code is not 200" - assert hasattr(resp, "json"), "The response has no attribute 'json'" - assert ( - "message" in resp.json - ), "There is no 'message' inside the response" - assert ( - resp.json["message"] == "Hello world!" - ), "The response message is wrong" - - @pytest.mark.integrationtest - def test_post_helloworld(self) -> None: - """Test the post method of the /helloworld endpoint.""" - postbody = {"name": "test"} - resp = self.app.post( - f"{URL_PREFIX}/helloworld", - headers=self.user_auth_header, - data=json.dumps(postbody), - content_type="application/json", - ) - assert isinstance( - resp, - Response, - ), "The response is not of type Response" - assert resp.status_code == 200, "The status code is not 200" - assert hasattr(resp, "json"), "The response has no attribute 'json'" - assert ( - "message" in resp.json - ), "There is no 'message' inside the response" - assert ( - resp.json["message"] == "Hello world! Hello world TEST!" - ), "The response message is wrong" - - @pytest.mark.integrationtest - def test_post_helloworld_error(self) -> None: - """Test the post method of the /helloworld endpoint.""" - postbody = {"namee": "test"} - resp = self.app.post( - f"{URL_PREFIX}/helloworld", - headers=self.user_auth_header, - data=json.dumps(postbody), - content_type="application/json", - ) - assert isinstance( - resp, - Response, - ), "The response is not of type Response" - assert resp.status_code == 400, "The status code is not 400" - assert resp.data == b"Missing name in JSON content" diff --git a/tests/integrationtests/test_projecthelloworld.py b/tests/integrationtests/test_projecthelloworld.py deleted file mode 100644 index 954faac..0000000 --- a/tests/integrationtests/test_projecthelloworld.py +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python -"""Copyright (c) 2018-2025 mundialis GmbH & Co. KG. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -Hello World test -""" - -__license__ = "GPLv3" -__author__ = "Anika Weinmann" -__copyright__ = "Copyright 2022 mundialis GmbH & Co. KG" -__maintainer__ = "mundialis GmbH & Co. KG" - - -import json - -import pytest -from actinia_api import URL_PREFIX -from flask import Response - -from tests.testsuite import ActiniaTestCase - - -class ActiniaHelloWorldTest(ActiniaTestCase): - """Actinia hello world test class for hello world endpoint.""" - - @pytest.mark.integrationtest - def test_get_helloworld(self) -> None: - """Test get method of /helloworld/projects/ endpoint.""" - resp = self.app.get( - f"{URL_PREFIX}/helloworld/{self.project_url_part}/project1", - ) - - assert isinstance( - resp, - Response, - ), "The response is not of type Response" - assert resp.status_code == 200, "The status code is not 200" - assert hasattr(resp, "json"), "The response has no attribute 'json'" - assert ( - "message" in resp.json - ), "There is no 'message' inside the response" - assert ( - resp.json["message"] == "Project: Hello world! project1" - ), "The response message is wrong" - - @pytest.mark.integrationtest - def test_post_helloworld(self) -> None: - """Test post method of /helloworld/projects/ endpoint.""" - postbody = {"name": "test"} - resp = self.app.post( - f"{URL_PREFIX}/helloworld/{self.project_url_part}/project1", - headers=self.user_auth_header, - data=json.dumps(postbody), - content_type="application/json", - ) - assert isinstance( - resp, - Response, - ), "The response is not of type Response" - assert resp.status_code == 200, "The status code is not 200" - assert hasattr(resp, "json"), "The response has no attribute 'json'" - assert ( - "message" in resp.json - ), "There is no 'message' inside the response" - assert resp.json["message"] == ( - "Project: Hello world! Hello world TEST! project1" - ), "The response message is wrong" - - @pytest.mark.integrationtest - def test_post_helloworld_error(self) -> None: - """Test post method of /helloworld/projects/ endpoint.""" - postbody = {"namee": "test"} - resp = self.app.post( - f"{URL_PREFIX}/helloworld/{self.project_url_part}/project1", - headers=self.user_auth_header, - data=json.dumps(postbody), - content_type="application/json", - ) - assert isinstance( - resp, - Response, - ), "The response is not of type Response" - assert resp.status_code == 400, "The status code is not 400" - assert resp.data == b"Missing name in JSON content" - - @pytest.mark.integrationtest - def test_redirecting_deprecated_locations_endpoint(self) -> None: - """Test redirecting of deprecated locations to projects endpoint.""" - if self.grass_version >= [8, 4]: - resp = self.app.get( - f"{URL_PREFIX}/helloworld/locations/project1", - ) - assert isinstance( - resp, - Response, - ), "The response is not of type Response" - # self.app.get is following redirects - assert resp.status_code == 200, "The status code is not 200" - # remove beginning of URL e.g. http://localhost or http://127.0.0.1 - resp_location = "/" + "/".join(resp.location.split("/")[3:]) - assert ( - resp_location == f"{URL_PREFIX}/helloworld/projects/project1" - ), ( - "The deprecated locations endpoint " - "is not forwarded to projects endpoint" - ) - - @pytest.mark.integrationtest - def test_projects_endpoint_for_lt_g84(self) -> None: - """Test non-supported project endpoint for GRASS versions < g84.""" - if self.grass_version < [8, 4]: - resp = self.app.get( - f"{URL_PREFIX}/helloworld/projects/project1", - ) - assert isinstance( - resp, - Response, - ), "The response is not of type Response" - assert resp.status_code == 404, "The status code is not 404" - assert resp.json["message"] == ( - "Not Found. The requested URL " - "is only available from " - "GRASS GIS version 8.4." - ), f"Wrong return message: {resp.data}" diff --git a/tests/unittests/test_transformation.py b/tests/unittests/test_transformation.py deleted file mode 100644 index a85c6b8..0000000 --- a/tests/unittests/test_transformation.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python -"""Copyright (c) 2018-2024 mundialis GmbH & Co. KG. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . - -First test -""" - -__license__ = "GPLv3" -__author__ = "Anika Weinmann" -__copyright__ = "Copyright 2022 mundialis GmbH & Co. KG" -__maintainer__ = "mundialis GmbH & Co. KG" - -import pytest - -from actinia_processing_lib.core.example import transform_input - - -@pytest.mark.unittest -@pytest.mark.parametrize( - ("inp", "ref_out"), - [("test", "Hello world TEST!"), ("bla23", "Hello world BLA23!")], -) -def test_transform_input(inp: str, ref_out: str) -> None: - """Test for tranform_input function.""" - out = transform_input(inp) - assert out == ref_out, f"Wrong result from transform_input for {inp}" diff --git a/tests_with_kvdb.sh b/tests_with_kvdb.sh index 65d9aa3..2881cfe 100755 --- a/tests_with_kvdb.sh +++ b/tests_with_kvdb.sh @@ -1,35 +1,26 @@ #!/usr/bin/env sh -# start kvdb server +# start valkey server valkey-server & sleep 1 valkey-cli ping -# start webhook server -webhook-server --host "0.0.0.0" --port "5005" & -sleep 10 - -# run tests -echo "${ACTINIA_CUSTOM_TEST_CFG}" -echo "${DEFAULT_CONFIG_PATH}" - +TEST_RES=1 if [ "$1" = "dev" ] then echo "Executing only 'dev' tests ..." - pytest -m "dev" + pytest -m 'dev' + TEST_RES=$? elif [ "$1" = "integrationtest" ] then - pytest -m "integrationtest" -elif [ "$1" = "unittest" ] -then - pytest -m "unittest" + pytest -m 'not unittest' + TEST_RES=$? else pytest + TEST_RES=$? fi -TEST_RES=$? - -# stop kvdb server +# stop valkey server valkey-cli shutdown return $TEST_RES From 5511cb7fe331b1611cc14014be2c0665525801d9 Mon Sep 17 00:00:00 2001 From: Carmen Date: Fri, 6 Jun 2025 15:20:50 +0200 Subject: [PATCH 43/46] lint --- .pylintrc | 50 ++++++ ruff.toml | 1 + .../ephemeral_processing.py | 3 + .../ephemeral_processing_with_export.py | 1 + .../test_ephemeral_processing.py | 142 +++++++++++------- 5 files changed, 143 insertions(+), 54 deletions(-) create mode 100644 .pylintrc diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..0c295bf --- /dev/null +++ b/.pylintrc @@ -0,0 +1,50 @@ +# For documentation about this config, see +# https://pylint.readthedocs.io/en/stable/user_guide/configuration/all-options.html (as of writing, version 2.17.4) + +[MAIN] + +jobs=0 # Default: 1 + +load-plugins= + pylint.extensions.broad_try_clause + +recursive=yes # Default: False + +# reports=yes # Default: False + +# score=no # Default: True + +# To disable more rules, see output of pylint. E.g. +# [...] C0301: Line too long (89/80) (line-too-long) +# can be suppressed with either disable=line-too-long or disable=C +# It is also possible to ignore a specific line by adding +# # pylint: disable=broad-exception-caught +# above the line causing the lint error +disable= + W, ; all Warnings are allowed to fail + import-error, ; To suppress e.g "Unable to import 'grass.script" + missing-module-docstring, ; we use the GRASS GIS header + R, ; refactoring + design recommendations + +[BASIC] +# for global variable setting +good-names=keep_data,download_dir,gisdbase,tgtgisrc,tmploc,srcgisrc + +[BROAD_TRY_CLAUSE] +max-try-statements=4 # Default: 1 + +[FORMAT] +max-line-length=80 # Default: 100 +max-module-lines=800 # Default: 1000 +ignore-long-lines=.*COPYRIGHT:.* |# . description:.*|\s*(# )?.*http.:\/\/\S+?|# %%* + +[MESSAGES CONTROL] +# C0209: Formatting a regular string which could be an f-string (consider-using-f-string) +# C0301: Line too long (already captured by other linters) +# C0302: Too many lines in module (2241/800) (too-many-lines) +# E0606 (possibly-used-before-assignment): to set options and flags at the bottom of the file without pre-initialization +disable=C0209,C0301,C0302,E0606 + + +[VARIABLES] +additional-builtins=_ # Default: () diff --git a/ruff.toml b/ruff.toml index a75af4d..b0e0339 100644 --- a/ruff.toml +++ b/ruff.toml @@ -142,6 +142,7 @@ lint.ignore = ["D104",] "ISC003", ] "tests/*" = [ + "N806", "PLR0913", "PLR0917", "PLR2004", diff --git a/src/actinia_processing_lib/ephemeral_processing.py b/src/actinia_processing_lib/ephemeral_processing.py index dbee9ac..107efd8 100644 --- a/src/actinia_processing_lib/ephemeral_processing.py +++ b/src/actinia_processing_lib/ephemeral_processing.py @@ -132,6 +132,7 @@ def __init__(self, rdc): """ # Fluentd hack to work in a multiprocessing environment try: + # pylint: disable=import-outside-toplevel from fluent import sender if sender: @@ -742,6 +743,7 @@ def _setup(self, init_grass=True): # fluent sender for this subprocess fluent_sender = None if self.has_fluent is True: + # pylint: disable=import-outside-toplevel from fluent import sender fluent_sender = sender.FluentSender( @@ -1714,6 +1716,7 @@ def _run_executable(self, process, poll_time=0.005): stdin_file = None if process.param_stdin_sources: + # pylint: disable=consider-using-enumerate for num, func in process.param_stdin_sources.items(): func_name = f"PARAM_STDIN_FUNC_{num}" for i in range(len(process.executable_params)): diff --git a/src/actinia_processing_lib/ephemeral_processing_with_export.py b/src/actinia_processing_lib/ephemeral_processing_with_export.py index 1c9341c..26bf3e0 100644 --- a/src/actinia_processing_lib/ephemeral_processing_with_export.py +++ b/src/actinia_processing_lib/ephemeral_processing_with_export.py @@ -115,6 +115,7 @@ def _export_raster( if format == "COG": # check if GDAL has COG driver + # pylint: disable=import-outside-toplevel from osgeo import gdal driver_list = [ diff --git a/tests/integrationtests/test_ephemeral_processing.py b/tests/integrationtests/test_ephemeral_processing.py index f0de588..85884c7 100644 --- a/tests/integrationtests/test_ephemeral_processing.py +++ b/tests/integrationtests/test_ephemeral_processing.py @@ -23,96 +23,130 @@ __maintainer__ = "mundialis GmbH & Co. KG" from actinia_core.core.resource_data_container import ResourceDataContainer -from actinia_processing_lib.utils import try_import +from actinia_processing_lib.utils import try_import -base_url_data = "https://apps.mundialis.de/actinia_test_datasets" -polygon_gml = f"{base_url_data}/polygon.gml" -process_chain_vector_import_info = { +BASE_URL_DATA = "https://apps.mundialis.de/actinia_test_datasets" +POLYGON_GML = f"{BASE_URL_DATA}/polygon.gml" +PROCESS_CHAIN_VECTOR_IMPORT_INFO = { "list": [ { "id": "v_info", "inputs": [ { "import_descr": { - "source": polygon_gml, + "source": POLYGON_GML, "type": "vector", }, "param": "map", "value": "polygon", - } + }, ], "module": "v.info", "flags": "g", - } + }, ], "version": "1", } -def test_ephemeral_processing(): +def test_ephemeral_processing() -> None: """Test basic processing.""" - + # pylint: disable=invalid-name EphemeralProcessing = try_import( "actinia_processing_lib.ephemeral_processing", "EphemeralProcessing", ) - # 'grass_data_base', 'grass_user_data_base', 'grass_base_dir', 'request_data', 'user_id', 'user_group', 'resource_id', 'iteration', 'status_url', 'api_info', 'resource_url_base', 'orig_time', 'orig_datetime', 'user_credentials', 'config', 'project_name', 'mapset_name', and 'map_name' + # 'grass_data_base', 'grass_user_data_base', 'grass_base_dir', + # 'request_data', 'user_id', 'user_group', 'resource_id', 'iteration', + # 'status_url', 'api_info', 'resource_url_base', 'orig_time', + # 'orig_datetime', 'user_credentials', 'config', 'project_name', + # 'mapset_name', and 'map_name' rdc = ResourceDataContainer( - '/actinia_core/grassdb', - '/actinia_core/userdata', - '/usr/local/grass', - process_chain_vector_import_info, - 'user', 'user', 'resource_id-1234', None, - 'http://localhost:8000/api/v1/status', + "/actinia_core/grassdb", + "/actinia_core/userdata", + "/usr/local/grass", + PROCESS_CHAIN_VECTOR_IMPORT_INFO, + "user", + "user", + "resource_id-1234", + None, + "http://localhost:8000/api/v1/status", { - 'endpoint': 'asyncephemeralexportresource', - 'method': 'POST', - 'path': '/api/v3/locations/nc_spm_08/processing_async_export', - 'request_url': 'http://0.0.0.0:8088/api/v3/locations/nc_spm_08/processing_async_export' + "endpoint": "asyncephemeralexportresource", + "method": "POST", + "path": "/api/v3/locations/nc_spm_08/processing_async_export", + "request_url": "http://0.0.0.0:8088/api/v3/locations/nc_spm_08/" + "processing_async_export", }, - 'http://0.0.0.0:8088/api/v3/resources/actinia-gdi/resource_id-0b2fdafe-6311-4ac8-a139-c4dcb783bada/__None__', + "http://0.0.0.0:8088/api/v1/resource-id-1234/__None__", 1749204596.1047864, - '2025-06-06 10:09:56.104791', + "2025-06-06 10:09:56.104791", { - 'user_id': 'user', - 'password_hash': '1234', - 'user_role': 'user', - 'user_group': 'user', - 'permissions': { - 'process_time_limit': 1800, - 'cell_limit': 1, - 'process_num_limit': 10, - 'accessible_modules': [ - 'v.info', - ] - } + "user_id": "user", + "password_hash": "1234", + "user_role": "user", + "user_group": "user", + "permissions": { + "process_time_limit": 1800, + "cell_limit": 1, + "process_num_limit": 10, + "accessible_modules": [ + "v.info", + ], + }, }, - { - 'LOG_LEVEL': 3, - 'AUTHENTICATION': False, - 'CHECK_CREDENTIALS': False - }, - 'nc_spm_08', None, None + {"LOG_LEVEL": 3, "AUTHENTICATION": False, "CHECK_CREDENTIALS": False}, + "nc_spm_08", + None, + None, ) processing = EphemeralProcessing(rdc) processing.run() - assert processing.finish_message == 'Processing successfully finished', \ - f"Expected 'Processing successfully finished', got '{processing.finish_message}'" - assert processing.last_module == 'v.info', \ - f"Expected 'v.info', got '{processing.last_module}'" - assert processing.number_of_processes == 3, \ - f"Expected 3 processes, got {processing.number_of_processes}" - assert processing.progress == {'step': 3, 'num_of_steps': 3}, \ - f"Expected progress to be {{'step': 3, 'num_of_steps': 3}}, got {processing.progress}" - assert processing.progress_steps == 3, \ - f"Expected progress_steps to be 3, got {processing.progress_steps}" - assert processing.run_state == {'success': None}, \ - f"Expected run_state to be {{'success': None}}, got {processing.run_state}" - + assert ( + processing.finish_message == "Processing successfully finished" + ), "Expected 'Processing successfully finished', got " + f"'{processing.finish_message}'" + assert ( + processing.last_module == "v.info" + ), f"Expected 'v.info', got '{processing.last_module}'" + assert ( + processing.number_of_processes == 3 + ), f"Expected 3 processes, got {processing.number_of_processes}" + assert processing.progress == { + "step": 3, + "num_of_steps": 3, + }, "Expected progress to be {{'step': 3, 'num_of_steps': 3}}, got " + f"{processing.progress}" + assert ( + processing.progress_steps == 3 + ), f"Expected progress_steps to be 3, got {processing.progress_steps}" + assert processing.run_state == { + "success": None, + }, "Expected run_state to be {{'success': None}}, got " + f"{processing.run_state}" # (Pdb) dir(processing) # removed internal attributes - # ['actinia_process_dict', 'actinia_process_list', 'api_info', 'cell_limit', 'config', 'data', 'finish_message', 'ginit', 'ginit_tmpfiles', 'global_project_path', 'grass_base_dir', 'grass_data_base', 'grass_temp_database', 'grass_user_data_base', 'has_fluent', 'interim_result', 'is_global_database', 'iteration', 'last_module', 'lock_interface', 'map_name', 'mapset_name', 'message_logger', 'module_output_dict', 'module_output_log', 'module_results', 'number_of_processes', 'orig_datetime', 'orig_time', 'output_parser_list', 'proc_chain_converter', 'process_chain_list', 'process_count', 'process_dict', 'process_num_limit', 'process_time_limit', 'progress', 'progress_steps', 'project_name', 'rdc', 'request_data', 'required_mapsets', 'resource_export_list', 'resource_id', 'resource_logger', 'resource_url_list', 'response_model_class', 'run', 'run_state', 'setup_flag', 'skip_region_check', 'status_url', 'temp_file_count', 'temp_file_path', 'temp_grass_data_base', 'temp_grass_data_base_name', 'temp_mapset_name', 'temp_mapset_path', 'temp_project_path', 'temporary_pc_files', 'unique_id', 'user_credentials', 'user_group', 'user_id', 'user_project_path', 'webhook_auth', 'webhook_finished', 'webhook_update'] + # ['actinia_process_dict', 'actinia_process_list', 'api_info', + # 'cell_limit', 'config', 'data', 'finish_message', 'ginit', + # 'ginit_tmpfiles', 'global_project_path', 'grass_base_dir', + # 'grass_data_base', 'grass_temp_database', 'grass_user_data_base', + # 'has_fluent', 'interim_result', 'is_global_database', 'iteration', + # 'last_module', 'lock_interface', 'map_name', 'mapset_name', + # 'message_logger', 'module_output_dict', 'module_output_log', + # 'module_results', 'number_of_processes', 'orig_datetime', 'orig_time', + # 'output_parser_list', 'proc_chain_converter', 'process_chain_list', + # 'process_count', 'process_dict', 'process_num_limit', + # 'process_time_limit', 'progress', 'progress_steps', 'project_name', + # 'rdc', 'request_data', 'required_mapsets', 'resource_export_list', + # 'resource_id', 'resource_logger', 'resource_url_list', + # 'response_model_class', 'run', 'run_state', 'setup_flag', + # 'skip_region_check', 'status_url', 'temp_file_count', 'temp_file_path', + # 'temp_grass_data_base', 'temp_grass_data_base_name', 'temp_mapset_name', + # 'temp_mapset_path', 'temp_project_path', 'temporary_pc_files', + # 'unique_id', 'user_credentials', 'user_group', 'user_id', + # 'user_project_path', 'webhook_auth', 'webhook_finished', + # 'webhook_update'] From 358c2982c56439ff21400191365b22d6a896790f Mon Sep 17 00:00:00 2001 From: Carmen Date: Fri, 6 Jun 2025 15:35:03 +0200 Subject: [PATCH 44/46] fix test call --- .github/workflows/test.yml | 86 +++++++++++++++++++------------------- tests_with_kvdb.sh | 4 ++ 2 files changed, 47 insertions(+), 43 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 62df317..4076b8c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,26 +11,26 @@ on: jobs: - unittests-G84: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Replace run only unittest command - run: | - sed -i "s+# RUN make test+RUN make unittest+g" docker/actinia-processing-lib-test/Dockerfile - - name: Unittests of actinia-processing-lib - id: docker_build - uses: docker/build-push-action@v6 - with: - push: false - tags: actinia-processing-lib-tests:alpine - context: . - file: docker/actinia-processing-lib-test/Dockerfile - no-cache: true - # pull: true + # unittests-G84: + # runs-on: ubuntu-latest + # steps: + # - name: Checkout + # uses: actions/checkout@v4 + # - name: Set up Docker Buildx + # uses: docker/setup-buildx-action@v3 + # - name: Replace run only unittest command + # run: | + # sed -i "s+# RUN make test+RUN make unittest+g" docker/actinia-processing-lib-test/Dockerfile + # - name: Unittests of actinia-processing-lib + # id: docker_build + # uses: docker/build-push-action@v6 + # with: + # push: false + # tags: actinia-processing-lib-tests:alpine + # context: . + # file: docker/actinia-processing-lib-test/Dockerfile + # no-cache: true + # # pull: true integration-tests-G84: runs-on: ubuntu-latest @@ -55,29 +55,29 @@ jobs: no-cache: true # pull: true - unittests-G83: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Replace actinia version (including GRASS version) - run: | - sed -i "s+mundialis/actinia:latest+mundialis/actinia:grass8.3+g" docker/actinia-processing-lib-test/Dockerfile - - name: Replace run only unittest command - run: | - sed -i "s+# RUN make test+RUN make unittest+g" docker/actinia-processing-lib-test/Dockerfile - - name: Unittests of actinia-processing-lib - id: docker_build - uses: docker/build-push-action@v6 - with: - push: false - tags: actinia-processing-lib-tests:alpine - context: . - file: docker/actinia-processing-lib-test/Dockerfile - no-cache: true - # pull: true + # unittests-G83: + # runs-on: ubuntu-latest + # steps: + # - name: Checkout + # uses: actions/checkout@v4 + # - name: Set up Docker Buildx + # uses: docker/setup-buildx-action@v3 + # - name: Replace actinia version (including GRASS version) + # run: | + # sed -i "s+mundialis/actinia:latest+mundialis/actinia:grass8.3+g" docker/actinia-processing-lib-test/Dockerfile + # - name: Replace run only unittest command + # run: | + # sed -i "s+# RUN make test+RUN make unittest+g" docker/actinia-processing-lib-test/Dockerfile + # - name: Unittests of actinia-processing-lib + # id: docker_build + # uses: docker/build-push-action@v6 + # with: + # push: false + # tags: actinia-processing-lib-tests:alpine + # context: . + # file: docker/actinia-processing-lib-test/Dockerfile + # no-cache: true + # # pull: true integration-tests-G83: runs-on: ubuntu-latest diff --git a/tests_with_kvdb.sh b/tests_with_kvdb.sh index 2881cfe..f5b7112 100755 --- a/tests_with_kvdb.sh +++ b/tests_with_kvdb.sh @@ -15,6 +15,10 @@ elif [ "$1" = "integrationtest" ] then pytest -m 'not unittest' TEST_RES=$? +elif [ "$1" = "unittest" ] +then + pytest -m 'unittest' + TEST_RES=$? else pytest TEST_RES=$? From a1094bb6bd19f1722b60e565fe7e9fd2313ea467 Mon Sep 17 00:00:00 2001 From: Carmen Date: Fri, 6 Jun 2025 15:38:49 +0200 Subject: [PATCH 45/46] lint --- src/actinia_processing_lib/ephemeral_processing_with_export.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/actinia_processing_lib/ephemeral_processing_with_export.py b/src/actinia_processing_lib/ephemeral_processing_with_export.py index 26bf3e0..02f0016 100644 --- a/src/actinia_processing_lib/ephemeral_processing_with_export.py +++ b/src/actinia_processing_lib/ephemeral_processing_with_export.py @@ -428,6 +428,7 @@ def _export_resources(self, use_raster_region=False): output_path = None # Legacy code + file_name = None if "name" in resource: file_name = resource["name"] if "value" in resource: From 13cc529119c33ae65c32b0f2c6a1973db5c9ec20 Mon Sep 17 00:00:00 2001 From: Carmen Date: Thu, 12 Jun 2025 11:39:19 +0200 Subject: [PATCH 46/46] update README --- README.md | 90 ++----------------------------------------------------- 1 file changed, 3 insertions(+), 87 deletions(-) diff --git a/README.md b/README.md index aea289f..a7514f6 100644 --- a/README.md +++ b/README.md @@ -1,61 +1,13 @@ # actinia-processing-lib -This is an example plugin for [actinia-core](https://github.com/mundialis/actinia_core) which adds a "Hello World" endpoint to actinia-core. +This is the processing library for [actinia-core](https://github.com/mundialis/actinia_core). -You can run actinia-processing-lib as an actinia-core plugin. +It is a requirement of actinia-core and some actinia plugins and not meant to be used standalone. -## Installation -Use docker-compose for installation: -```bash -docker compose -f docker/docker-compose.yml build -docker compose -f docker/docker-compose.yml up -d -``` - -### Installation hints -* If you get an error like: `ERROR: for docker_kvdb_1 Cannot start service valkey: network xxx not found` you can try the following: -```bash -docker compose -f docker/docker-compose.yml down -# remove all custom networks not used by a container -docker network prune -docker compose -f docker/docker-compose.yml up -d -``` - -### Requesting helloworld endpoint -You can test the plugin and request the `/helloworld` endpoint, e.g. with: -```bash -curl -u actinia-gdi:actinia-gdi -X GET http://localhost:8088/api/v3/helloworld | jq - -curl -u actinia-gdi:actinia-gdi -H 'accept: application/json' -H 'Content-Type: application/json' -X POST http://localhost:8088/api/v3/helloworld -d '{"name": "test"}' | jq -``` ## DEV setup -For a DEV setup you can use the docker/docker-compose.yml: -```bash -docker compose -f docker/docker-compose.yml build -docker compose -f docker/docker-compose.yml run --rm --service-ports --entrypoint sh actinia - -# install the plugin -(cd /src/actinia-processing-lib && python3 setup.py install) -# start actinia-core with your plugin -sh /src/start.sh -# gunicorn -b 0.0.0.0:8088 -w 1 --access-logfile=- -k gthread actinia_core.main:flask_app -``` +For a DEV setup integrated with other actinia components, see [here](https://github.com/actinia-org/actinia-docker#local-dev-setup-for-actinia-core-plugins-with-vscode). -### Hints - -* If you have no `.git` folder in the plugin folder, you need to set the -`SETUPTOOLS_SCM_PRETEND_VERSION` before installing the plugin: -```bash -export SETUPTOOLS_SCM_PRETEND_VERSION=0.0 -``` -Otherwise you will get an error like this -`LookupError: setuptools-scm was unable to detect version for '/src/actinia-processing-lib'.`. - -* If you make changes in code and nothing changes you can try to uninstall the plugin: -```bash -pip3 uninstall actinia-processing-lib.wsgi -y -rm -rf /usr/lib/python3.8/site-packages/actinia_processing_lib.wsgi-*.egg -``` ### Running tests You can run the tests in the actinia test docker: @@ -69,45 +21,9 @@ cd /src/actinia-processing-lib/ # run all tests make test -# run only unittests -make unittest # run only integrationtests make integrationtest # run only tests which are marked for development with the decorator '@pytest.mark.dev' make devtest ``` - -## Starting steps for own plugin -If you want to have your own plugin you can use this repo to create it by -executing the `scripts/create_own_plugin.sh`. - -If you want the repo in git then you first have to create an empty git repository -and then run the script. Then follow the last instructions from the script -to upload the initial code to your git repository. - -```bash -bash create_own_plugin.sh actinia-ex2-plugin git -``` - -If you only want your own plugin in a folder and not in git you can execute the -script like this: - -```bash -bash create_own_plugin.sh actinia-ex2-plugin -``` - -## Hint for the development of actinia plugins - -### skip permission check -The parameter [`skip_permission_check`](https://github.com/mundialis/actinia_core/blob/main/src/actinia_core/processing/actinia_processing/ephemeral_processing.py#L1420-L1422) (see [example in actinia-statistic plugin](https://github.com/mundialis/actinia_statistic_plugin/blob/master/src/actinia_statistic_plugin/vector_sampling.py#L207)) -should only be set to `True` if you are sure that you really don't want to check the permissions. - -The skip of the permission check leads to a skipping of: -* [the module check](https://github.com/mundialis/actinia_core/blob/main/src/actinia_core/processing/actinia_processing/ephemeral_processing.py#L579-L589) -* [the limit of the number of processes](https://github.com/mundialis/actinia_core/blob/main/src/actinia_core/processing/actinia_processing/ephemeral_processing.py#L566-L570) -* the limit of the processing time - -Not skipped are: -* the limit of the cells -* the mapset/project limitations of the user