diff --git a/Orange/base.py b/Orange/base.py index 07500b96a5b..c840f881201 100644 --- a/Orange/base.py +++ b/Orange/base.py @@ -25,18 +25,16 @@ class ReprableWithPreprocessors(Reprable): def _reprable_omit_param(self, name, default, value): - if name == "preprocessors": - default_cls = type(self).preprocessors - if value is default or value is default_cls: - return True - else: - try: - return all(p1 is p2 for p1, p2 in - itertools.zip_longest(value, default_cls)) - except (ValueError, TypeError): - return False - else: + if name != "preprocessors": return super()._reprable_omit_param(name, default, value) + default_cls = type(self).preprocessors + if value is default or value is default_cls: + return True + try: + return all(p1 is p2 for p1, p2 in + itertools.zip_longest(value, default_cls)) + except (ValueError, TypeError): + return False class Learner(ReprableWithPreprocessors): @@ -135,8 +133,9 @@ def __call__(self, data, progress_callback=None): OrangeDeprecationWarning) if len(data.domain.class_vars) > 1 and not self.supports_multiclass: - raise TypeError("%s doesn't support multiple class variables" % - self.__class__.__name__) + raise TypeError( + f"{self.__class__.__name__} doesn't support multiple class variables" + ) progress_callback(0.1, "Fitting...") model = self._fit_model(data) @@ -156,9 +155,8 @@ def __call__(self, data, progress_callback=None): def _fit_model(self, data): if type(self).fit is Learner.fit: return self.fit_storage(data) - else: - X, Y, W = data.X, data.Y, data.W if data.has_weights() else None - return self.fit(X, Y, W) + X, Y, W = data.X, data.Y, data.W if data.has_weights() else None + return self.fit(X, Y, W) def preprocess(self, data, progress_callback=None): """Apply the `preprocessors` to the data""" @@ -219,28 +217,23 @@ class Model(Reprable): def __init__(self, domain=None, original_domain=None): self.domain = domain - if original_domain is not None: - self.original_domain = original_domain - else: - self.original_domain = domain + self.original_domain = domain if original_domain is None else original_domain self.used_vals = None def predict(self, X): if type(self).predict_storage is Model.predict_storage: raise TypeError("Descendants of Model must overload method predict") - else: - Y = np.zeros((len(X), len(self.domain.class_vars))) - Y[:] = np.nan - table = Table(self.domain, X, Y) - return self.predict_storage(table) + Y = np.zeros((len(X), len(self.domain.class_vars))) + Y[:] = np.nan + table = Table(self.domain, X, Y) + return self.predict_storage(table) def predict_storage(self, data): if isinstance(data, Storage): return self.predict(data.X) elif isinstance(data, Instance): return self.predict(np.atleast_2d(data.x)) - raise TypeError("Unrecognized argument (instance of '{}')" - .format(type(data).__name__)) + raise TypeError(f"Unrecognized argument (instance of '{type(data).__name__}')") def get_backmappers(self, data): backmappers = [] @@ -519,7 +512,7 @@ def predict(self, X): def __repr__(self): # Params represented as a comment because not passed into constructor - return super().__repr__() + ' # params=' + repr(self.params) + return f'{super().__repr__()} # params={repr(self.params)}' class SklLearner(Learner, metaclass=WrapperMeta): @@ -553,18 +546,14 @@ def params(self, value): def _get_sklparams(self, values): skllearner = self.__wraps__ - if skllearner is not None: - spec = list( - inspect.signature(skllearner.__init__).parameters.keys() - ) - # first argument is 'self' - assert spec[0] == "self" - params = { - name: values[name] for name in spec[1:] if name in values - } - else: + if skllearner is None: raise TypeError("Wrapper does not define '__wraps__'") - return params + spec = list( + inspect.signature(skllearner.__init__).parameters.keys() + ) + # first argument is 'self' + assert spec[0] == "self" + return {name: values[name] for name in spec[1:] if name in values} def preprocess(self, data, progress_callback=None): data = super().preprocess(data, progress_callback) @@ -672,10 +661,9 @@ def __init__(self, cat_model, cat_features, domain): self.cat_features = cat_features def __call__(self, data, ret=Model.Value): - if isinstance(data, Table): - with data.force_unlocked(data.X): - return super().__call__(data, ret) - else: + if not isinstance(data, Table): + return super().__call__(data, ret) + with data.force_unlocked(data.X): return super().__call__(data, ret) def predict(self, X): @@ -689,7 +677,7 @@ def predict(self, X): def __repr__(self): # Params represented as a comment because not passed into constructor - return super().__repr__() + ' # params=' + repr(self.params) + return f'{super().__repr__()} # params={repr(self.params)}' class CatGBBaseLearner(Learner, metaclass=WrapperMeta): diff --git a/Orange/canvas/__main__.py b/Orange/canvas/__main__.py index 975874e27d1..1cdefc29d38 100644 --- a/Orange/canvas/__main__.py +++ b/Orange/canvas/__main__.py @@ -203,7 +203,6 @@ def pull_notifications(): # get set of already displayed notification IDs, stored in settings["notifications/displayed"] displayedIDs = literal_eval(settings.value("notifications/displayed", "set()", str)) - # get notification feed from Github class GetNotifFeed(QThread): resultReady = pyqtSignal(str) @@ -259,7 +258,7 @@ def parse_yaml_notification(YAMLnotif: YAMLNotification): # if no custom icon is set, default to notif type icon if YAMLnotif.icon is None and YAMLnotif.type is not None: - YAMLnotif.icon = "canvas/icons/" + YAMLnotif.type + ".png" + YAMLnotif.icon = f"canvas/icons/{YAMLnotif.type}.png" # instantiate and return Notification notif = YAMLnotif.toNotification() @@ -275,6 +274,7 @@ def remember_notification(role): displayedIDs.add(YAMLnotif.id) settings.setValue("notifications/displayed", repr(displayedIDs)) + notif.clicked.connect(remember_notification) # display notification diff --git a/Orange/canvas/config.py b/Orange/canvas/config.py index 05c9fb19d9c..f9d242279c9 100644 --- a/Orange/canvas/config.py +++ b/Orange/canvas/config.py @@ -75,15 +75,13 @@ class Config(config.Config): def init(self): super().init() QGuiApplication.setApplicationDisplayName(self.ApplicationName) - widget_settings_dir_cfg = environ.get_path("widget_settings_dir", "") - if widget_settings_dir_cfg: + if widget_settings_dir_cfg := environ.get_path("widget_settings_dir", ""): # widget_settings_dir is configured via config file set_widget_settings_dir_components( widget_settings_dir_cfg, self.ApplicationVersion ) - canvas_settings_dir_cfg = environ.get_path("canvas_settings_dir", "") - if canvas_settings_dir_cfg: + if canvas_settings_dir_cfg := environ.get_path("canvas_settings_dir", ""): # canvas_settings_dir is configured via config file QSettings.setPath( QSettings.IniFormat, QSettings.UserScope, diff --git a/Orange/classification/calibration.py b/Orange/classification/calibration.py index ea1c4773cd1..b7273d2680e 100644 --- a/Orange/classification/calibration.py +++ b/Orange/classification/calibration.py @@ -39,10 +39,7 @@ def __call__(self, data, ret=Model.Value): with np.errstate(invalid="ignore"): # we fix nanx below vals = (class_probs >= self.threshold).astype(float) vals[np.isnan(class_probs)] = np.nan - if ret == Model.Value: - return vals - else: - return vals, probs + return vals if ret == Model.Value else (vals, probs) class ThresholdLearner(Learner): @@ -110,10 +107,7 @@ def __call__(self, data, ret=Model.Value): if ret == Model.Probs: return cal_probs vals = np.argmax(cal_probs, axis=1) - if ret == Model.Value: - return vals - else: - return vals, cal_probs + return vals if ret == Model.Value else (vals, cal_probs) def calibrated_probs(self, probs): if self.calibrators: @@ -171,6 +165,5 @@ def get_model(self, model, ytrue, probabilities): else: fitter = IsotonicRegression(out_of_bounds='clip') probabilities[np.isinf(probabilities)] = 1 - calibrators = [fitter.fit(cls_probs, ytrue) - for cls_idx, cls_probs in enumerate(probabilities.T)] + calibrators = [fitter.fit(cls_probs, ytrue) for cls_probs in probabilities.T] return CalibratedClassifier(model, calibrators) diff --git a/Orange/classification/logistic_regression.py b/Orange/classification/logistic_regression.py index aeb4fbfc1cb..1a07d70eb4c 100644 --- a/Orange/classification/logistic_regression.py +++ b/Orange/classification/logistic_regression.py @@ -47,10 +47,7 @@ def _initialize_wrapped(self): # l1 penalty. solver, penalty = params.pop("solver"), params.get("penalty") if solver == "auto": - if penalty == "l1": - solver = "liblinear" - else: - solver = "lbfgs" + solver = "liblinear" if penalty == "l1" else "lbfgs" params["solver"] = solver return self.__wraps__(**params) diff --git a/Orange/classification/majority.py b/Orange/classification/majority.py index 7d33110153f..b2e4dd4d625 100644 --- a/Orange/classification/majority.py +++ b/Orange/classification/majority.py @@ -79,6 +79,6 @@ def predict(self, X): return probs def __str__(self): - return 'ConstantModel {}'.format(self.dist) + return f'ConstantModel {self.dist}' MajorityLearner.__returns__ = ConstantModel diff --git a/Orange/classification/rules.py b/Orange/classification/rules.py index db9baf1395c..02f4058f033 100644 --- a/Orange/classification/rules.py +++ b/Orange/classification/rules.py @@ -108,8 +108,7 @@ def likelihood_ratio_statistic(x, y): x[x == 0] = 1e-5 y[y == 0] = 1e-5 y *= x.sum() / y.sum() - lrs = 2 * (x * np.log(x/y)).sum() - return lrs + return 2 * (x * np.log(x/y)).sum() def get_dist(Y, W, domain): @@ -208,14 +207,12 @@ def evaluate_rule(self, rule): dist_sum, p_dist_sum = dist.sum(), p_dist.sum() d_modus = argmaxrnd(dist) + p_cond = dist_sum / p_dist_sum if tc is not None: - p_cond = dist_sum / p_dist_sum # p_cond = dist[tc] / p_dist[tc] p_true_positive = dist[tc] / dist_sum p_class = p_dist[tc] / p_dist_sum else: - # generality of the rule - p_cond = dist_sum / p_dist_sum # true positives of class c p_true_positive = dist[d_modus] / dist_sum # prior probability of class c @@ -554,11 +551,7 @@ def find_new_selectors(self, X, Y, W, domain, existing_selectors): s2 = Selector(column=i, op=">=", value=val) possible_selectors.extend([s1, s2]) - # remove redundant selectors - possible_selectors = [smh for smh in possible_selectors if - smh not in existing_selectors] - - return possible_selectors + return [smh for smh in possible_selectors if smh not in existing_selectors] @staticmethod def discretize(X, Y, W, domain): @@ -784,10 +777,10 @@ def __len__(self): return len(self.selectors) def __str__(self): - attributes = self.domain.attributes class_var = self.domain.class_var if self.selectors: + attributes = self.domain.attributes cond = " AND ".join([attributes[s.column].name + s.op + (str(attributes[s.column].values[int(s.value)]) if attributes[s.column].is_discrete @@ -797,10 +790,11 @@ def __str__(self): # it is possible that prediction is not set yet - use _ for outcome outcome = ( - (class_var.name + "=" + class_var.values[self.prediction]) - if self.prediction is not None else "_" + f"{class_var.name}={class_var.values[self.prediction]}" + if self.prediction is not None + else "_" ) - return "IF {} THEN {} ".format(cond, outcome) + return f"IF {cond} THEN {outcome} " class RuleHuntress: @@ -1281,7 +1275,7 @@ def fit_storage(self, data): Y = Y.astype(dtype=int) rule_list = self.find_rules(X, Y, W, None, self.base_rules, data.domain) # add the default rule, if required - if not rule_list or rule_list and rule_list[-1].length > 0: + if not rule_list or rule_list[-1].length > 0: rule_list.append(self.generate_default_rule(X, Y, W, data.domain)) return CN2Classifier(domain=data.domain, rule_list=rule_list) diff --git a/Orange/classification/simple_tree.py b/Orange/classification/simple_tree.py index 0b894b54f22..f60f5fcadfd 100644 --- a/Orange/classification/simple_tree.py +++ b/Orange/classification/simple_tree.py @@ -102,7 +102,7 @@ def __init__(self, learner, data): self.cls_vars = list(data.domain.class_vars) if len(data.domain.class_vars) != 1: n_cls = len(data.domain.class_vars) - raise ValueError("Number of classes should be 1: {}".format(n_cls)) + raise ValueError(f"Number of classes should be 1: {n_cls}") if data.domain.has_discrete_class: self.type = Classification @@ -121,8 +121,7 @@ def __init__(self, learner, data): elif learner.skip_prob == 'log2': skip_prob = 1.0 - np.log2(X.shape[1]) / X.shape[1] else: - raise ValueError( - "skip_prob not valid: {}".format(learner.skip_prob)) + raise ValueError(f"skip_prob not valid: {learner.skip_prob}") attr_vals = [] domain = [] @@ -240,12 +239,10 @@ def dumps_tree(self, node): if n.type == ContinuousNode: xs.append('{:.5f}'.format(n.split)) elif self.type == Classification: - for i in range(self.cls_vals): - xs.append('{:.2f}'.format(n.dist[i])) + xs.extend('{:.2f}'.format(n.dist[i]) for i in range(self.cls_vals)) else: xs.append('{:.5f} {:.5f}'.format(n.n, n.sum)) - for i in range(n.children_size): - xs.append(self.dumps_tree(n.children[i])) + xs.extend(self.dumps_tree(n.children[i]) for i in range(n.children_size)) xs.append('}') return ' '.join(xs) @@ -277,17 +274,16 @@ def to_string(self, node=None, level=0): format_str = format_leaf = format_node = None else: format_str = f"({self.domain.class_var.format_str}: %s)" - format_leaf = " --> " + format_str - format_node = "%s " + format_str + format_leaf = f" --> {format_str}" + format_node = f"%s {format_str}" if n.children_size == 0: - if self.type == Classification: - node_cont = [round(n.dist[i], 1) - for i in range(self.cls_vals)] - index = node_cont.index(max(node_cont)) - major_class = self.cls_vars[0].values[index] - return ' --> %s (%s)' % (major_class, node_cont) - else: + if self.type != Classification: return format_leaf % (n.sum / n.n, n.n) + node_cont = [round(n.dist[i], 1) + for i in range(self.cls_vals)] + index = node_cont.index(max(node_cont)) + major_class = self.cls_vars[0].values[index] + return f' --> {major_class} ({node_cont})' else: attr = self.dom_attr[n.split_attr] node_desc = attr.name @@ -295,15 +291,15 @@ def to_string(self, node=None, level=0): if self.type == Classification: node_cont = [round(n.dist[i], 1) for i in range(self.cls_vals)] - ret_str = indent + '%s (%s)' % (node_desc, node_cont) + ret_str = f'{indent}{node_desc} ({node_cont})' else: ret_str = indent + format_node % (node_desc, n.sum / n.n, n.n) for i in range(n.children_size): if attr.is_continuous: split = '<=' if i % 2 == 0 else '>' split += attr.format_str % n.split - ret_str += indent + ': %s' % split + ret_str += f'{indent}: {split}' else: - ret_str += indent + ': %s' % attr.values[i] + ret_str += f'{indent}: {attr.values[i]}' ret_str += self.to_string(n.children[i], level + 1) return ret_str diff --git a/Orange/classification/svm.py b/Orange/classification/svm.py index eb387c37938..c6c6e5a6c34 100644 --- a/Orange/classification/svm.py +++ b/Orange/classification/svm.py @@ -51,4 +51,4 @@ def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma="auto", coef0=0.0, learners = [SVMLearner(), NuSVMLearner(), LinearSVMLearner()] res = CrossValidation()(data_, learners) for l, ca in zip(learners, CA()(res)): - print("learner: {}\nCA: {}\n".format(l, ca)) + print(f"learner: {l}\nCA: {ca}\n") diff --git a/Orange/classification/tree.py b/Orange/classification/tree.py index 764e801f53d..68e80535c0b 100644 --- a/Orange/classification/tree.py +++ b/Orange/classification/tree.py @@ -204,9 +204,9 @@ def fit_storage(self, data): for attr in data.domain.attributes): # No fallback in the script; widgets can prevent this error # by providing a fallback and issue a warning about doing so - raise ValueError("Exhaustive binarization does not handle " - "attributes with more than {} values". - format(self.MAX_BINARIZATION)) + raise ValueError( + f"Exhaustive binarization does not handle attributes with more than {self.MAX_BINARIZATION} values" + ) active_inst = np.nonzero(~np.isnan(data.Y))[0].astype(np.int32) root = self._build_tree(data, active_inst) @@ -216,8 +216,7 @@ def fit_storage(self, data): distr[:] = 1 root = Node(None, 0, distr) root.subset = active_inst - model = TreeModel(data, root) - return model + return TreeModel(data, root) class SklTreeClassifier(SklModel, TreeModelInterface): diff --git a/Orange/clustering/clustering.py b/Orange/clustering/clustering.py index a7598a03629..21f12f79a1c 100644 --- a/Orange/clustering/clustering.py +++ b/Orange/clustering/clustering.py @@ -23,8 +23,7 @@ def fix_dim(x): if isinstance(data, np.ndarray): one_d = data.ndim == 1 prediction = self.predict(np.atleast_2d(data)) - elif isinstance(data, scipy.sparse.csr_matrix) or \ - isinstance(data, scipy.sparse.csc_matrix): + elif isinstance(data, (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix)): prediction = self.predict(data) elif isinstance(data, (Table, Instance)): if isinstance(data, Instance): @@ -32,8 +31,8 @@ def fix_dim(x): one_d = True if data.domain != self.domain: if self.original_domain.attributes != data.domain.attributes \ - and data.X.size \ - and not np.isnan(data.X).all(): + and data.X.size \ + and not np.isnan(data.X).all(): data = data.transform(self.original_domain) if np.isnan(data.X).all(): raise DomainTransformationError( diff --git a/Orange/clustering/hierarchical.py b/Orange/clustering/hierarchical.py index 18ffcc84684..86f46a16691 100644 --- a/Orange/clustering/hierarchical.py +++ b/Orange/clustering/hierarchical.py @@ -131,14 +131,18 @@ def __eq__(self, other): return isinstance(other, Tree) and tuple(self) == tuple(other) def __lt__(self, other): - if not isinstance(other, Tree): - return NotImplemented - return tuple(self) < tuple(other) + return ( + tuple(self) < tuple(other) + if isinstance(other, Tree) + else NotImplemented + ) def __le__(self, other): - if not isinstance(other, Tree): - return NotImplemented - return tuple(self) <= tuple(other) + return ( + tuple(self) <= tuple(other) + if isinstance(other, Tree) + else NotImplemented + ) def __getnewargs__(self): return tuple(self) @@ -278,8 +282,7 @@ def postorder(tree, branches=attrgetter("branches")): while stack: current = stack.popleft() - children = branches(current) - if children: + if children := branches(current): # yield the item on the way up if current in visited: yield current @@ -299,8 +302,7 @@ def preorder(tree, branches=attrgetter("branches")): while stack: current = stack.popleft() yield current - children = branches(current) - if children: + if children := branches(current): stack.extendleft(reversed(children)) @@ -329,7 +331,7 @@ def prune(cluster, level=None, height=None, condition=None): be supplied. """ - if not any(arg is not None for arg in [level, height, condition]): + if all(arg is None for arg in [level, height, condition]): raise ValueError("At least one pruning argument must be supplied") level_check = height_check = condition_check = lambda cl: False @@ -351,10 +353,7 @@ def check_all(cl): for node in postorder(cluster): if check_all(node): - if node.is_leaf: - T[node] = node - else: - T[node] = Tree(node.value, ()) + T[node] = node if node.is_leaf else Tree(node.value, ()) else: T[node] = Tree(node.value, tuple(T[ch] for ch in node.branches)) @@ -369,11 +368,10 @@ def cluster_depths(cluster): :rtype: class:`dict` """ - depths = {} - depths[cluster] = 0 + depths = {cluster: 0} for cluster in preorder(cluster): cl_depth = depths[cluster] - depths.update(dict.fromkeys(cluster.branches, cl_depth + 1)) + depths |= dict.fromkeys(cluster.branches, cl_depth + 1) return depths diff --git a/Orange/data/domain.py b/Orange/data/domain.py index 88e3245d819..c82b141e1e5 100644 --- a/Orange/data/domain.py +++ b/Orange/data/domain.py @@ -175,9 +175,10 @@ def _ensure_indices(self): indices = dict(chain.from_iterable( ((var, idx), (var.name, idx), (idx, idx)) for idx, var in enumerate(self._variables))) - indices.update(chain.from_iterable( - ((var, -1-idx), (var.name, -1-idx), (-1-idx, -1-idx)) - for idx, var in enumerate(self.metas))) + indices |= chain.from_iterable( + ((var, -1 - idx), (var.name, -1 - idx), (-1 - idx, -1 - idx)) + for idx, var in enumerate(self.metas) + ) self._indices = indices def __setstate__(self, state): @@ -223,8 +224,7 @@ def get_places(max_index): return 0 if max_index == 1 else int(log(max_index, 10)) + 1 def get_name(base, index, places): - return base if not places \ - else "{} {:0{}}".format(base, index + 1, places) + return "{} {:0{}}".format(base, index + 1, places) if places else base if X.ndim != 2: raise ValueError('X must be a 2-dimensional array') @@ -290,10 +290,7 @@ def _get_equivalent(self, var): if isinstance(var, Variable): index = self._indices.get(var.name) if index is not None: - if index >= 0: - myvar = self.variables[index] - else: - myvar = self.metas[-1 - index] + myvar = self.variables[index] if index >= 0 else self.metas[-1 - index] if myvar == var: return myvar return None @@ -318,10 +315,7 @@ def __getitem__(self, idx): if var is not None: return var raise KeyError(idx) - if index >= 0: - return self.variables[index] - else: - return self.metas[-1 - index] + return self.variables[index] if index >= 0 else self.metas[-1 - index] def __contains__(self, item): """ @@ -365,7 +359,7 @@ def index(self, var): if equiv is not None: return self._indices[equiv] - raise ValueError("'%s' is not in domain" % var) + raise ValueError(f"'{var}' is not in domain") def has_discrete_attributes(self, include_class=False, include_metas=False): """ @@ -424,22 +418,38 @@ def convert(self, inst): return inst._x, inst._y, inst._metas c = DomainConversion(inst.domain, self) l = len(inst.domain.attributes) - values = [(inst._x[i] if 0 <= i < l - else inst._y[i - l] if i >= l - else inst._metas[-i - 1]) - if isinstance(i, int) - else (Unknown if not i else i(inst)) - for i in c.variables] - metas = [(inst._x[i] if 0 <= i < l - else inst._y[i - l] if i >= l - else inst._metas[-i - 1]) - if isinstance(i, int) - else (Unknown if not i else i(inst)) - for i in c.metas] + values = [ + ( + inst._x[i] + if 0 <= i < l + else inst._y[i - l] + if i >= l + else inst._metas[-i - 1] + ) + if isinstance(i, int) + else i(inst) + if i + else Unknown + for i in c.variables + ] + metas = [ + ( + inst._x[i] + if 0 <= i < l + else inst._y[i - l] + if i >= l + else inst._metas[-i - 1] + ) + if isinstance(i, int) + else i(inst) + if i + else Unknown + for i in c.metas + ] else: nvars = len(self._variables) nmetas = len(self._metas) - if len(inst) != nvars and len(inst) != nvars + nmetas: + if len(inst) not in [nvars, nvars + nmetas]: raise ValueError("invalid data length for domain") values = [var.to_val(val) for var, val in zip(self._variables, inst)] @@ -451,23 +461,22 @@ def convert(self, inst): nattrs = len(self.attributes) # Let np.array decide dtype for values return np.array(values[:nattrs]), np.array(values[nattrs:]),\ - np.array(metas, dtype=object) + np.array(metas, dtype=object) def select_columns(self, col_idx): attributes, col_indices = self._compute_col_indices(col_idx) - if attributes is not None: - n_attrs = len(self.attributes) - r_attrs = [attributes[i] - for i, col in enumerate(col_indices) - if 0 <= col < n_attrs] - r_classes = [attributes[i] - for i, col in enumerate(col_indices) - if col >= n_attrs] - r_metas = [attributes[i] - for i, col in enumerate(col_indices) if col < 0] - return Domain(r_attrs, r_classes, r_metas) - else: + if attributes is None: return self + n_attrs = len(self.attributes) + r_attrs = [attributes[i] + for i, col in enumerate(col_indices) + if 0 <= col < n_attrs] + r_classes = [attributes[i] + for i, col in enumerate(col_indices) + if col >= n_attrs] + r_metas = [attributes[i] + for i, col in enumerate(col_indices) if col < 0] + return Domain(r_attrs, r_classes, r_metas) def _compute_col_indices(self, col_idx): if col_idx is ...: @@ -515,12 +524,15 @@ def copy(self): ) def __eq__(self, other): - if not isinstance(other, Domain): - return False - - return (self.attributes == other.attributes and - self.class_vars == other.class_vars and - self.metas == other.metas) + return ( + ( + self.attributes == other.attributes + and self.class_vars == other.class_vars + and self.metas == other.metas + ) + if isinstance(other, Domain) + else False + ) def __hash__(self): if self._hash is None: diff --git a/Orange/data/filter.py b/Orange/data/filter.py index 65c0a44a68e..a89c3e3e940 100644 --- a/Orange/data/filter.py +++ b/Orange/data/filter.py @@ -291,10 +291,7 @@ def __init__(self, column, values): def __call__(self, inst): value = inst[inst.domain.index(self.column)] - if self.values is None: - return not isnan(value) - else: - return value in self.values + return not isnan(value) if self.values is None else value in self.values def __eq__(self, other): return isinstance(other, FilterDiscrete) and \ @@ -381,19 +378,19 @@ def __str__(self): elif isinstance(self.column, Variable): column = self.column.name else: - column = "feature({})".format(self.column) + column = f"feature({self.column})" names = {self.Equal: "=", self.NotEqual: "≠", self.Less: "<", self.LessEqual: "≤", self.Greater: ">", self.GreaterEqual: "≥"} if self.oper in names: - return "{} {} {}".format(column, names[self.oper], self.ref) + return f"{column} {names[self.oper]} {self.ref}" if self.oper == self.Between: - return "{} ≤ {} ≤ {}".format(self.min, column, self.max) + return f"{self.min} ≤ {column} ≤ {self.max}" if self.oper == self.Outside: - return "not {} ≤ {} ≤ {}".format(self.min, column, self.max) + return f"not {self.min} ≤ {column} ≤ {self.max}" if self.oper == self.IsDefined: - return "{} is defined".format(column) + return f"{column} is defined" return "invalid operator" diff --git a/Orange/data/instance.py b/Orange/data/instance.py index 434dbe0ae55..d3fa6276d5c 100644 --- a/Orange/data/instance.py +++ b/Orange/data/instance.py @@ -102,8 +102,7 @@ def __setitem__(self, key, value): key = self._domain.index(key) value = self._domain[key].to_val(value) if key >= 0 and not isinstance(value, (int, float)): - raise TypeError("Expected primitive value, got '%s'" % - type(value).__name__) + raise TypeError(f"Expected primitive value, got '{type(value).__name__}'") if 0 <= key < len(self._domain.attributes): self._x[key] = value @@ -138,26 +137,25 @@ def __getitem__(self, key): @staticmethod def str_values(data, variables, limit=True): - if limit: - s = ", ".join(var.str_val(val) - for var, val in zip(variables, data[:5])) - if len(data) > 5: - s += ", ..." - return s - else: + if not limit: return ", ".join(var.str_val(val) for var, val in zip(variables, data)) + s = ", ".join(var.str_val(val) + for var, val in zip(variables, data[:5])) + if len(data) > 5: + s += ", ..." + return s def _str(self, limit): - s = "[" + self.str_values(self._x, self._domain.attributes, limit) + s = f"[{self.str_values(self._x, self._domain.attributes, limit)}" if self._domain.class_vars: s += " | " + \ - self.str_values(self._y, self._domain.class_vars, limit) + self.str_values(self._y, self._domain.class_vars, limit) s += "]" if self._domain.metas: s += " {" + \ - self.str_values(self._metas, self._domain.metas, limit) + \ - "}" + self.str_values(self._metas, self._domain.metas, limit) + \ + "}" return s def __str__(self): @@ -232,7 +230,8 @@ def set_class(self, value): classes. """ self._check_single_class() - if not isinstance(value, Real): - self._y[0] = self._domain.class_var.to_val(value) - else: - self._y[0] = value + self._y[0] = ( + value + if isinstance(value, Real) + else self._domain.class_var.to_val(value) + ) diff --git a/Orange/data/io.py b/Orange/data/io.py index 07592d72bc0..2fc869102a0 100644 --- a/Orange/data/io.py +++ b/Orange/data/io.py @@ -134,14 +134,14 @@ def read(self): lambda: (locale.getpreferredencoding(False), None), lambda: (sys.getdefaultencoding(), None), # desperate lambda: ('utf-8', None), # ... - lambda: ('utf-8', 'ignore')): # fallback + lambda: ('utf-8', 'ignore')): # fallback encoding, errors = encoding() # Clear the error flag for all except the last check, because # the error of second-to-last check is stored and shown as warning in owfile if errors != 'ignore': error = '' with self.open(self.filename, mode='rt', newline='', - encoding=encoding, errors=errors) as file: + encoding=encoding, errors=errors) as file: # Sniff the CSV dialect (delimiter, quotes, ...) try: dialect = csv.Sniffer().sniff( @@ -174,16 +174,14 @@ def read(self): path.split(self.filename)[-1])[0] if error and isinstance(error, UnicodeDecodeError): pos, endpos = error.args[2], error.args[3] - warning = ('Skipped invalid byte(s) in position ' - '{}{}').format(pos, - ('-' + str(endpos)) if (endpos - pos) > 1 else '') + warning = f"Skipped invalid byte(s) in position {pos}{f'-{str(endpos)}' if endpos - pos > 1 else ''}" warnings.warn(warning) self.set_table_metadata(self.filename, data) return data except Exception as e: error = e continue - raise ValueError('Cannot parse dataset {}: {}'.format(self.filename, error)) from error + raise ValueError(f'Cannot parse dataset {self.filename}: {error}') from error @classmethod def write_file(cls, filename, data, with_annotations=True): @@ -268,7 +266,7 @@ def read(self): if self.sheet and len(self.sheets) > 1: table.name = '-'.join((table.name, self.sheet)) except Exception: - raise IOError("Couldn't load spreadsheet from " + self.filename) + raise IOError(f"Couldn't load spreadsheet from {self.filename}") return table @@ -314,10 +312,7 @@ def str_(x): return filter(any, cells) def _get_active_sheet(self) -> openpyxl.worksheet.worksheet.Worksheet: - if self.sheet: - return self.workbook[self.sheet] - else: - return self.workbook.active + return self.workbook[self.sheet] if self.sheet else self.workbook.active @classmethod def write_file(cls, filename, data, with_annotations=False): @@ -414,7 +409,7 @@ class UrlReader(FileFormat): def __init__(self, filename): filename = filename.strip() if not urlparse(filename).scheme: - filename = 'http://' + filename + filename = f'http://{filename}' # Fully support URL with query or fragment like http://filename.txt?a=1&b=2#c=3 def quote_byte(b): @@ -479,14 +474,14 @@ def _trim_googlesheet(url): r'(?:/.*?gid=(?P\d+).*|.*)?', url, re.IGNORECASE) try: - workbook, sheet = match.group('workbook_id'), match.group('sheet_id') + workbook, sheet = match['workbook_id'], match['sheet_id'] if not workbook: raise ValueError except (AttributeError, ValueError): raise ValueError - url = 'https://docs.google.com/spreadsheets/d/{}/export?format=tsv'.format(workbook) + url = f'https://docs.google.com/spreadsheets/d/{workbook}/export?format=tsv' if sheet: - url += '&gid=' + sheet + url += f'&gid={sheet}' return url @staticmethod @@ -497,7 +492,7 @@ def _trim_googledrive(url): match = re.match(r'/file/d/(?P[^/]+).*', parts.path) if not match: raise ValueError - id_ = match.group("id") + id_ = match["id"] parts = parts._replace(path=f"uc?export=download&id={id_}", query=None) return urlunsplit(parts) diff --git a/Orange/data/io_base.py b/Orange/data/io_base.py index 67722099538..3b4043ee509 100644 --- a/Orange/data/io_base.py +++ b/Orange/data/io_base.py @@ -74,7 +74,7 @@ def __init__(self, flags): setattr(self, flag, True) setattr(self, self.ALL.get(flag, ''), True) elif flag: - warnings.warn('Invalid attribute flag \'{}\''.format(flag)) + warnings.warn(f"Invalid attribute flag \'{flag}\'") @staticmethod def join(iterable, *args): @@ -95,9 +95,9 @@ def split(s): '|'.join(flatten(getattr(vartype, 'TYPE_HEADERS') for vartype in Variable.registry.values())) )) -_RE_FLAGS = re.compile(r'^\s*( |{}|)*\s*$'.format( - '|'.join(flatten(filter(None, i) for i in Flags.ALL.items())) -)) +_RE_FLAGS = re.compile( + f"^\s*( |{'|'.join(flatten(filter(None, i) for i in Flags.ALL.items()))}|)*\s*$" +) class _ColumnProperties: @@ -107,10 +107,7 @@ def __init__(self, valuemap=None, values=None, orig_values=None, self.values = values self.orig_values = orig_values self.coltype = coltype - if coltype_kwargs is None: - self.coltype_kwargs = {} - else: - self.coltype_kwargs = dict(coltype_kwargs) + self.coltype_kwargs = {} if coltype_kwargs is None else dict(coltype_kwargs) class _TableHeader: @@ -209,10 +206,11 @@ def __init__(self, data: np.ndarray, ncols: int, def create_table(self) -> Table: self.create_columns() - if not self.data.size: - return Table.from_domain(self.get_domain(), 0) - else: - return Table.from_numpy(self.get_domain(), *self.get_arrays()) + return ( + Table.from_numpy(self.get_domain(), *self.get_arrays()) + if self.data.size + else Table.from_domain(self.get_domain(), 0) + ) def create_columns(self): names = self.header.names @@ -436,9 +434,7 @@ def parse_headers(cls, data: Iterable[List[str]]) -> Tuple[List, Iterable]: # Try to parse a three-line header lines = [] try: - lines.append(list(next(data))) - lines.append(list(next(data))) - lines.append(list(next(data))) + lines.extend((list(next(data)), list(next(data)), list(next(data)))) except StopIteration: lines, data = [], chain(lines, data) if lines: @@ -553,21 +549,20 @@ def get_reader(cls, filename): # Skip ambiguous, invalid compression-only extensions added on OSX if ext in Compression.all: continue - if fnmatch(path.basename(filename), '*' + ext): + if fnmatch(path.basename(filename), f'*{ext}'): return reader(filename) - raise MissingReaderException('No readers for file "{}"'.format(filename)) + raise MissingReaderException(f'No readers for file "{filename}"') @classmethod def set_table_metadata(cls, filename, table): # pylint: disable=bare-except - if isinstance(filename, str) and path.exists(filename + '.metadata'): + if isinstance(filename, str) and path.exists(f'{filename}.metadata'): try: - with open(filename + '.metadata', 'rb') as f: + with open(f'{filename}.metadata', 'rb') as f: table.attributes = pickle.load(f) - # Unpickling throws different exceptions, not just UnpickleError except: - with open(filename + '.metadata', encoding='utf-8') as f: + with open(f'{filename}.metadata', encoding='utf-8') as f: table.attributes = OrderedDict( (k.strip(), v.strip()) for k, v in (line.split(":", 1) @@ -595,7 +590,7 @@ def write_file(fn): pickle.dump(data.attributes, f, protocol=PICKLE_PROTOCOL) if isinstance(filename, str): - metafile = filename + '.metadata' + metafile = f'{filename}.metadata' if getattr(data, 'attributes', None): write_file(metafile) elif path.exists(metafile): @@ -680,15 +675,14 @@ def write_data(cls, write, data): class _FileFormatMeta(Registry): - def __new__(mcs, name, bases, attrs): - newcls = super().__new__(mcs, name, bases, attrs) + def __new__(cls, name, bases, attrs): + newcls = super().__new__(cls, name, bases, attrs) # Optionally add compressed versions of extensions as supported if getattr(newcls, 'SUPPORT_COMPRESSED', False): new_extensions = list(getattr(newcls, 'EXTENSIONS', ())) for compression in Compression.all: - for ext in newcls.EXTENSIONS: - new_extensions.append(ext + compression) + new_extensions.extend(ext + compression for ext in newcls.EXTENSIONS) if sys.platform in ('darwin', 'win32'): # OSX file dialog doesn't support filtering on double # extensions (e.g. .csv.gz) @@ -780,11 +774,9 @@ def locate(cls, filename, search_dirs=('.',)): if path.exists(absolute_filename): break for ext in cls.readers: - if fnmatch(path.basename(filename), '*' + ext): + if fnmatch(path.basename(filename), f'*{ext}'): break - # glob uses fnmatch internally - matching_files = glob(absolute_filename + ext) - if matching_files: + if matching_files := glob(absolute_filename + ext): absolute_filename = matching_files[0] break if path.exists(absolute_filename): @@ -793,7 +785,7 @@ def locate(cls, filename, search_dirs=('.',)): absolute_filename = "" if not path.exists(absolute_filename): - raise IOError('File "{}" was not found.'.format(filename)) + raise IOError(f'File "{filename}" was not found.') return absolute_filename @@ -808,4 +800,4 @@ def open(filename, *args, **kwargs): @classmethod def qualified_name(cls): - return cls.__module__ + '.' + cls.__name__ + return f'{cls.__module__}.{cls.__name__}' diff --git a/Orange/data/pandas_compat.py b/Orange/data/pandas_compat.py index fb80b495494..1d41887e548 100644 --- a/Orange/data/pandas_compat.py +++ b/Orange/data/pandas_compat.py @@ -64,7 +64,7 @@ def __init__(self, *args, **kwargs): data = table.metas vars_ = table.domain.metas - index = ['_o' + str(id_) for id_ in table.ids] + index = [f'_o{str(id_)}' for id_ in table.ids] varsdict = {var._name: var for var in vars_} columns = varsdict.keys() @@ -364,7 +364,7 @@ def table_from_frames(xdf, ydf, mdf): ydf = ydf.reset_index(drop=True) dfs = xdf, ydf, mdf - if not all(df.shape[0] == xdf.shape[0] for df in dfs): + if any(df.shape[0] != xdf.shape[0] for df in dfs): raise ValueError(f"Leading dimension mismatch " f"(not {xdf.shape[0]} == {ydf.shape[0]} == {mdf.shape[0]})") @@ -389,7 +389,7 @@ def table_from_frames(xdf, ydf, mdf): W = [df.orange_weights[i] for i in df.index if i in df.orange_weights] if len(W) != len(df.index): W = None - attributes.update(df.orange_attributes) + attributes |= df.orange_attributes else: W = None diff --git a/Orange/data/sql/backend/base.py b/Orange/data/sql/backend/base.py index 208e01a375c..e479b2cb34e 100644 --- a/Orange/data/sql/backend/base.py +++ b/Orange/data/sql/backend/base.py @@ -62,9 +62,11 @@ def list_tables(self, schema=None): with self.execute_sql_query(query) as cur: tables = [] for schema, name in cur.fetchall(): - sql = "{}.{}".format( - self.quote_identifier(schema), - self.quote_identifier(name)) if schema else self.quote_identifier(name) + sql = ( + f"{self.quote_identifier(schema)}.{self.quote_identifier(name)}" + if schema + else self.quote_identifier(name) + ) tables.append(TableDesc(name, schema, sql)) return tables @@ -114,10 +116,7 @@ def get_distinct_values(self, field_name, table_name): query = self.distinct_values_query(field_name, table_name) with self.execute_sql_query(query) as cur: values = cur.fetchall() - if len(values) > 20: - return () - else: - return tuple(str(x[0]) for x in values) + return () if len(values) > 20 else tuple(str(x[0]) for x in values) def create_variable(self, field_name, field_metadata, type_hints, inspect_table=None): diff --git a/Orange/data/sql/backend/mssql.py b/Orange/data/sql/backend/mssql.py index e6ae16dd15a..dcf4170b1fb 100644 --- a/Orange/data/sql/backend/mssql.py +++ b/Orange/data/sql/backend/mssql.py @@ -44,7 +44,7 @@ def list_tables_query(self, schema=None): """ def quote_identifier(self, name): - return "[{}]".format(name) + return f"[{name}]" def unquote_identifier(self, quoted_name): return quoted_name[1:-1] @@ -93,12 +93,9 @@ def create_variable(self, field_name, field_metadata, type_hints, inspect_table= inspect_table) field_name_q = self.quote_identifier(field_name) - if var.is_continuous: - if isinstance(var, TimeVariable): - var.to_sql = ToSql("DATEDIFF(s, '1970-01-01 00:00:00', {})".format(field_name_q)) - else: - var.to_sql = ToSql(field_name_q) - else: # discrete or string + if var.is_continuous and isinstance(var, TimeVariable): + var.to_sql = ToSql(f"DATEDIFF(s, '1970-01-01 00:00:00', {field_name_q})") + else: var.to_sql = ToSql(field_name_q) return var @@ -117,11 +114,9 @@ def _guess_variable(self, field_name, field_metadata, inspect_table): tv.have_time = True return tv - if type_code == STRING: - if inspect_table: - values = self.get_distinct_values(field_name, inspect_table) - if values: - return DiscreteVariable(field_name, values) + if type_code == STRING and inspect_table: + if values := self.get_distinct_values(field_name, inspect_table): + return DiscreteVariable(field_name, values) return StringVariable(field_name) @@ -135,16 +130,7 @@ def count_approx(self, query): cur.execute(query) result = cur.fetchone() match = self.EST_ROWS_RE.search(result[0]) - if not match: - # Either StatementEstRows was not found or - # a float is received. - # If it is a float then it is most probable - # that the server's statistics are out of date - # and the result is false. In that case - # it is preferable to return None so - # an exact count be used. - return None - return int(match.group(1)) + return int(match.group(1)) if match else None finally: cur.execute("SET SHOWPLAN_XML OFF") except pymssql.Error as ex: diff --git a/Orange/data/sql/backend/postgres.py b/Orange/data/sql/backend/postgres.py index b6ebbae2b51..1e57b50f5a3 100644 --- a/Orange/data/sql/backend/postgres.py +++ b/Orange/data/sql/backend/postgres.py @@ -43,11 +43,11 @@ def _create_connection_pool(self): def _create_extensions(self): for ext in EXTENSIONS: try: - query = "CREATE EXTENSION IF NOT EXISTS {}".format(ext) + query = f"CREATE EXTENSION IF NOT EXISTS {ext}" with self.execute_sql_query(query): pass except BackendError: - warnings.warn("Database is missing extension {}".format(ext)) + warnings.warn(f"Database is missing extension {ext}") self.missing_extension.append(ext) def create_sql_query(self, table_name, fields, filters=(), @@ -88,17 +88,14 @@ def execute_sql_query(self, query, params=None): self.connection_pool.putconn(connection) def quote_identifier(self, name): - return '"%s"' % name + return f'"{name}"' def unquote_identifier(self, quoted_name): - if quoted_name.startswith('"'): - return quoted_name[1:len(quoted_name) - 1] - else: - return quoted_name + return quoted_name[1:-1] if quoted_name.startswith('"') else quoted_name def list_tables_query(self, schema=None): if schema: - schema_clause = "AND n.nspname = '{}'".format(schema) + schema_clause = f"AND n.nspname = '{schema}'" else: schema_clause = "AND pg_catalog.pg_table_is_visible(c.oid)" return """SELECT n.nspname as "Schema", @@ -124,14 +121,11 @@ def create_variable(self, field_name, field_metadata, field_name_q = self.quote_identifier(field_name) if var.is_continuous: if isinstance(var, TimeVariable): - var.to_sql = ToSql("extract(epoch from {})" - .format(field_name_q)) + var.to_sql = ToSql(f"extract(epoch from {field_name_q})") else: - var.to_sql = ToSql("({})::double precision" - .format(field_name_q)) + var.to_sql = ToSql(f"({field_name_q})::double precision") else: # discrete or string - var.to_sql = ToSql("({})::text" - .format(field_name_q)) + var.to_sql = ToSql(f"({field_name_q})::text") return var def _guess_variable(self, field_name, field_metadata, inspect_table): @@ -156,26 +150,22 @@ def _guess_variable(self, field_name, field_metadata, inspect_table): if type_code in INT_TYPES: # bigint, int, smallint if inspect_table: - values = self.get_distinct_values(field_name, inspect_table) - if values: + if values := self.get_distinct_values(field_name, inspect_table): return DiscreteVariable.make(field_name, values) return ContinuousVariable.make(field_name) if type_code in BOOLEAN_TYPES: return DiscreteVariable.make(field_name, ['false', 'true']) - if type_code in CHAR_TYPES: - if inspect_table: - values = self.get_distinct_values(field_name, inspect_table) - # remove trailing spaces - values = [v.rstrip() for v in values] - if values: - return DiscreteVariable.make(field_name, values) + if type_code in CHAR_TYPES and inspect_table: + values = self.get_distinct_values(field_name, inspect_table) + if values := [v.rstrip() for v in values]: + return DiscreteVariable.make(field_name, values) return StringVariable.make(field_name) def count_approx(self, query): - sql = "EXPLAIN " + query + sql = f"EXPLAIN {query}" with self.execute_sql_query(sql) as cur: s = ''.join(row[0] for row in cur.fetchall()) return int(re.findall(r'rows=(\d*)', s)[0]) diff --git a/Orange/data/sql/filter.py b/Orange/data/sql/filter.py index 68f8181af7f..357794b9ecc 100644 --- a/Orange/data/sql/filter.py +++ b/Orange/data/sql/filter.py @@ -5,26 +5,23 @@ class IsDefinedSql(filter.IsDefined): InheritEq = True def to_sql(self): - sql = " AND ".join([ - '%s IS NOT NULL' % column - for column in self.columns - ]) + sql = " AND ".join([f'{column} IS NOT NULL' for column in self.columns]) if self.negate: - sql = 'NOT (%s)' % sql + sql = f'NOT ({sql})' return sql class SameValueSql(filter.SameValue): def to_sql(self): if self.value is None: - sql = '%s IS NULL' % self.column + sql = f'{self.column} IS NULL' else: - sql = "%s = %s" % (self.column, self.value) + sql = f"{self.column} = {self.value}" if self.negate: if self.value is None: - sql = 'NOT (%s)' % sql + sql = f'NOT ({sql})' else: - sql = '(NOT (%s) OR %s is NULL)' % (sql, self.column) + sql = f'(NOT ({sql}) OR {self.column} is NULL)' return sql @@ -33,40 +30,38 @@ def to_sql(self): aggregator = " AND " if self.conjunction else " OR " sql = aggregator.join(c.to_sql() for c in self.conditions) if self.negate: - sql = 'NOT (%s)' % sql - return sql if self.conjunction else '({})'.format(sql) + sql = f'NOT ({sql})' + return sql if self.conjunction else f'({sql})' class FilterDiscreteSql(filter.FilterDiscrete): def to_sql(self): if self.values is not None: - return "%s IN (%s)" % (self.column, ','.join(self.values)) + return f"{self.column} IN ({','.join(self.values)})" else: - return "%s IS NOT NULL" % self.column + return f"{self.column} IS NOT NULL" class FilterContinuousSql(filter.FilterContinuous): def to_sql(self): if self.oper == self.Equal: - return "%s = %s" % (self.column, self.ref) + return f"{self.column} = {self.ref}" elif self.oper == self.NotEqual: - return "%s <> %s OR %s IS NULL" % (self.column, self.ref, self.column) + return f"{self.column} <> {self.ref} OR {self.column} IS NULL" elif self.oper == self.Less: - return "%s < %s" % (self.column, self.ref) + return f"{self.column} < {self.ref}" elif self.oper == self.LessEqual: - return "%s <= %s" % (self.column, self.ref) + return f"{self.column} <= {self.ref}" elif self.oper == self.Greater: - return "%s > %s" % (self.column, self.ref) + return f"{self.column} > {self.ref}" elif self.oper == self.GreaterEqual: - return "%s >= %s" % (self.column, self.ref) + return f"{self.column} >= {self.ref}" elif self.oper == self.Between: - return "%s >= %s AND %s <= %s" % (self.column, self.ref, - self.column, self.max) + return f"{self.column} >= {self.ref} AND {self.column} <= {self.max}" elif self.oper == self.Outside: - return "(%s < %s OR %s > %s)" % (self.column, self.ref, - self.column, self.max) + return f"({self.column} < {self.ref} OR {self.column} > {self.max})" elif self.oper == self.IsDefined: - return "%s IS NOT NULL" % self.column + return f"{self.column} IS NOT NULL" else: raise ValueError("Invalid operator") @@ -74,31 +69,31 @@ def to_sql(self): class FilterString(filter.FilterString): def to_sql(self): if self.oper == self.IsDefined: - return "%s IS NOT NULL" % self.column + return f"{self.column} IS NOT NULL" if self.case_sensitive: field = self.column value = self.ref else: - field = 'LOWER(%s)' % self.column + field = f'LOWER({self.column})' value = self.ref.lower() if self.oper == self.Equal: - return "%s = %s" % (field, quote(value)) + return f"{field} = {quote(value)}" elif self.oper == self.NotEqual: - return "%s <> %s OR %s IS NULL" % (field, quote(value), field) + return f"{field} <> {quote(value)} OR {field} IS NULL" elif self.oper == self.Less: - return "%s < %s" % (field, quote(value)) + return f"{field} < {quote(value)}" elif self.oper == self.LessEqual: - return "%s <= %s" % (field, quote(value)) + return f"{field} <= {quote(value)}" elif self.oper == self.Greater: - return "%s > %s" % (field, quote(value)) + return f"{field} > {quote(value)}" elif self.oper == self.GreaterEqual: - return "%s >= %s" % (field, quote(value)) + return f"{field} >= {quote(value)}" elif self.oper == self.Between: high = quote(self.max if self.case_sensitive else self.max.lower()) - return "%s >= %s AND %s <= %s" % (field, quote(value), field, high) + return f"{field} >= {quote(value)} AND {field} <= {high}" elif self.oper == self.Outside: high = quote(self.max if self.case_sensitive else self.max.lower()) - return "(%s < %s OR %s > %s)" % (field, quote(value), field, high) + return f"({field} < {quote(value)} OR {field} > {high})" elif self.oper == self.Contains: return "%s LIKE '%%%s%%'" % (field, value) elif self.oper == self.StartsWith: @@ -121,10 +116,7 @@ def to_sql(self): def quote(value): - if isinstance(value, str): - return "'%s'" % value - else: - return value + return f"'{value}'" if isinstance(value, str) else value class CustomFilterSql(filter.Filter): @@ -133,7 +125,4 @@ def __init__(self, where_sql, negate=False): self.sql = where_sql def to_sql(self): - if not self.negate: - return "(" + self.sql + ")" - else: - return "NOT (" + self.sql + ")" + return f"NOT ({self.sql})" if self.negate else f"({self.sql})" diff --git a/Orange/data/sql/table.py b/Orange/data/sql/table.py index 55ebcacf31f..18fb952e384 100644 --- a/Orange/data/sql/table.py +++ b/Orange/data/sql/table.py @@ -1,6 +1,7 @@ """ Support for example tables wrapping data stored on a PostgreSQL server. """ + import functools import logging import threading @@ -20,7 +21,7 @@ AUTO_DL_LIMIT = 10000 DEFAULT_SAMPLE_TIME = 1 sql_log = logging.getLogger('sql_log') -sql_log.debug("Logging started: {}".format(strftime("%Y-%m-%d %H:%M:%S"))) +sql_log.debug(f'Logging started: {strftime("%Y-%m-%d %H:%M:%S")}') class SqlTable(Table): @@ -86,7 +87,7 @@ def __init__( if isinstance(table_or_sql, TableDesc): table = table_or_sql.sql elif "select" in table_or_sql.lower(): - table = "(%s) as my_table" % table_or_sql.strip("; ") + table = f'({table_or_sql.strip("; ")}) as my_table' else: table = self.backend.quote_identifier(table_or_sql) self.table_name = table @@ -110,15 +111,12 @@ def get_domain(self, type_hints=None, inspect_values=False): var = self.backend.create_variable(field_name, field_metadata, type_hints, inspect_table) - if var.is_string: + if not var.is_string and var in type_hints.class_vars: + class_vars.append(var) + elif not var.is_string and var in type_hints.metas or var.is_string: metas.append(var) else: - if var in type_hints.class_vars: - class_vars.append(var) - elif var in type_hints.metas: - metas.append(var) - else: - attrs.append(var) + attrs.append(var) return Domain(attrs, class_vars, metas) @@ -154,7 +152,7 @@ def __getitem__(self, key): except TypeError: pass - elif not (row_idx is Ellipsis or row_idx == slice(None)): + elif row_idx is not Ellipsis and row_idx != slice(None): # TODO if row_idx specify multiple rows, one of the following must # happen # - the new table remembers which rows are selected (implement @@ -174,11 +172,10 @@ def __getitem__(self, key): def _fetch_row(self, row_index): attributes = self.domain.variables + self.domain.metas rows = [row_index] - values = list(self._query(attributes, rows=rows)) - if not values: - raise IndexError('Could not retrieve row {} from table {}'.format( - row_index, self.name)) - return Instance(self.domain, values[0]) + if values := list(self._query(attributes, rows=rows)): + return Instance(self.domain, values[0]) + else: + raise IndexError(f'Could not retrieve row {row_index} from table {self.name}') def __iter__(self): """ Iterating through the rows executes the query using a cursor and @@ -194,8 +191,8 @@ def _query(self, attributes=None, filters=(), rows=None): fields = [] for attr in attributes: assert hasattr(attr, 'to_sql'), \ - "Cannot use ordinary attributes with sql backend" - field_str = '(%s) AS "%s"' % (attr.to_sql(), attr.name) + "Cannot use ordinary attributes with sql backend" + field_str = f'({attr.to_sql()}) AS "{attr.name}"' fields.append(field_str) if not fields: raise ValueError("No fields selected.") @@ -370,7 +367,7 @@ def _get_stats(self, columns): results = cur.fetchone() stats = [] i = 0 - for ci, (field_name, continuous) in enumerate(columns): + for field_name, continuous in columns: if continuous: stats.append(results[i:i+6]) i += 6 @@ -393,11 +390,13 @@ def _get_distributions(self, columns): dists = [] for col in columns: field_name = col.to_sql() - fields = field_name, "COUNT(%s)" % field_name - query = self._sql_query(fields, - filters=['%s IS NOT NULL' % field_name], - group_by=[field_name], - order_by=[field_name]) + fields = field_name, f"COUNT({field_name})" + query = self._sql_query( + fields, + filters=[f'{field_name} IS NOT NULL'], + group_by=[field_name], + order_by=[field_name], + ) with self.backend.execute_sql_query(query) as cur: dist = np.array(cur.fetchall()) if col.is_continuous: @@ -434,21 +433,20 @@ def _compute_contingency(self, col_vars=None, row_var=None): all_contingencies = [None] * len(columns) for i, column in enumerate(columns): column_field = column.to_sql() - fields = [row_field, column_field, "COUNT(%s)" % column_field] + fields = [row_field, column_field, f"COUNT({column_field})"] group_by = [row_field, column_field] order_by = [column_field] - filters = ['%s IS NOT NULL' % f - for f in (row_field, column_field)] + filters = [f'{f} IS NOT NULL' for f in (row_field, column_field)] query = self._sql_query(fields, filters=filters, group_by=group_by, order_by=order_by) with self.backend.execute_sql_query(query) as cur: data = list(cur.fetchall()) if column.is_continuous: all_contingencies[i] = \ - (self._continuous_contingencies(data, row), [], [], 0) + (self._continuous_contingencies(data, row), [], [], 0) else: all_contingencies[i] =\ - (self._discrete_contingencies(data, row, column), [], + (self._discrete_contingencies(data, row, column), [], [], 0) return all_contingencies @@ -458,13 +456,11 @@ def _continuous_contingencies(self, data, row): last = None i = -1 for row_value, column_value, count in data: - if column_value == last: - counts[row.to_val(row_value), i] += count - else: + if column_value != last: i += 1 last = column_value values[i] = column_value - counts[row.to_val(row_value), i] += count + counts[row.to_val(row_value), i] += count return (values, counts) def _discrete_contingencies(self, data, row, column): @@ -506,12 +502,10 @@ def _filter_same_value(self, column, value, negate=False): pass elif var.is_discrete: value = var.to_val(value) - value = "'%s'" % var.repr_val(value) - else: - pass + value = f"'{var.repr_val(value)}'" t2 = self.copy() t2.row_filters += \ - (sql_filter.SameValueSql(var.to_sql(), value, negate),) + (sql_filter.SameValueSql(var.to_sql(), value, negate),) return t2 def _filter_values(self, f): @@ -522,8 +516,7 @@ def _filter_values(self, f): if cond.values is None: values = None else: - values = ["'%s'" % var.repr_val(var.to_val(v)) - for v in cond.values] + values = [f"'{var.repr_val(var.to_val(v))}'" for v in cond.values] new_condition = sql_filter.FilterDiscreteSql( column=var.to_sql(), values=values) @@ -547,7 +540,7 @@ def _filter_values(self, f): values=cond.values, case_sensitive=cond.case_sensitive) else: - raise ValueError('Invalid condition %s' % type(cond)) + raise ValueError(f'Invalid condition {type(cond)}') conditions.append(new_condition) t2 = self.copy() t2.row_filters += (sql_filter.ValuesSql(conditions=conditions, @@ -606,25 +599,19 @@ def _sample(self, method, parameter, no_cache=False): parameter = str(parameter) if "." in self.table_name: schema, name = self.table_name.split(".") - sample_name = '__%s_%s_%s' % ( - self.backend.unquote_identifier(name), - method, - parameter.replace('.', '_').replace('-', '_')) + sample_name = f"__{self.backend.unquote_identifier(name)}_{method}_{parameter.replace('.', '_').replace('-', '_')}" sample_table_q = ".".join([schema, self.backend.quote_identifier(sample_name)]) else: - sample_table = '__%s_%s_%s' % ( - self.backend.unquote_identifier(self.table_name), - method, - parameter.replace('.', '_').replace('-', '_')) + sample_table = f"__{self.backend.unquote_identifier(self.table_name)}_{method}_{parameter.replace('.', '_').replace('-', '_')}" sample_table_q = self.backend.quote_identifier(sample_table) create = False try: - query = "SELECT * FROM " + sample_table_q + " LIMIT 0;" + query = f"SELECT * FROM {sample_table_q} LIMIT 0;" with self.backend.execute_sql_query(query): pass if no_cache: - query = "DROP TABLE " + sample_table_q + query = f"DROP TABLE {sample_table_q}" with self.backend.execute_sql_query(query): pass create = True diff --git a/Orange/data/table.py b/Orange/data/table.py index f8e08944fc7..e1a743839a6 100644 --- a/Orange/data/table.py +++ b/Orange/data/table.py @@ -110,9 +110,7 @@ def __init__(self, table, row_index): @property def weight(self): - if not self.table.has_weights(): - return 1 - return self.table.W[self.row_index] + return self.table.W[self.row_index] if self.table.has_weights() else 1 @weight.setter def weight(self, weight): @@ -140,20 +138,18 @@ def __setitem__(self, key, value): value = var.to_val(value) if key >= 0: if not isinstance(value, Real): - raise TypeError("Expected primitive value, got '%s'" % - type(value).__name__) + raise TypeError(f"Expected primitive value, got '{type(value).__name__}'") if key < len(self._x): # write to self.table.X to support table unlocking for live instances self.table.X[self.row_index, key] = value if self.sparse_x is not None: self._x[key] = value + elif self.sparse_y is None: + self.table._Y[self.row_index] = value + if self.table._Y.ndim == 1: # if _y is not a view + self._y[0] = value else: - if self.sparse_y is not None: - self.table._Y[self.row_index, key - len(self._x)] = value - else: - self.table._Y[self.row_index] = value - if self.table._Y.ndim == 1: # if _y is not a view - self._y[0] = value + self.table._Y[self.row_index, key - len(self._x)] = value else: self.table.metas[self.row_index, -1 - key] = value if self.sparse_metas is not None: @@ -166,14 +162,10 @@ def sp_values(row, variables, sparsity=None): # row is sparse row_entries, idx = [], 0 - while idx < len(variables): - # Make sure to stop printing variables if we limit the output - if limit and len(row_entries) >= 5: - break - + while idx < len(variables) and not (limit and len(row_entries) >= 5): var = variables[idx] if var.is_discrete or row[idx]: - row_entries.append("%s=%s" % (var.name, var.str_val(row[idx]))) + row_entries.append(f"{var.name}={var.str_val(row[idx])}") idx += 1 @@ -185,9 +177,9 @@ def sp_values(row, variables, sparsity=None): return s domain = self._domain - s = "[" + sp_values(self._x, domain.attributes, self.sparse_x) + s = f"[{sp_values(self._x, domain.attributes, self.sparse_x)}" if domain.class_vars: - s += " | " + sp_values(self._y, domain.class_vars, self.sparse_y) + s += f" | {sp_values(self._y, domain.class_vars, self.sparse_y)}" s += "]" if domain.metas: s += " {" + sp_values(self._metas, domain.metas, self.sparse_metas) + "}" @@ -210,11 +202,10 @@ def _compute_column(func, *args, **kwargs): col = func(*args, **kwargs) if isinstance(col, np.ndarray) and col.ndim != 1: err = f"{type(col)} must return a column, not {col.ndim}d array" - if col.ndim == 2: - warnings.warn(err) - col = col.reshape(-1) - else: + if col.ndim != 2: raise ValueError(err) + warnings.warn(err) + col = col.reshape(-1) return col @@ -292,11 +283,7 @@ def get_columns(self, source, row_indices, out=None, target_indices=None): Y = csc_matrix(Y) if self.row_selection_needed: - if row_indices is ...: - sourceri = source - else: - sourceri = source[row_indices] - + sourceri = source if row_indices is ... else source[row_indices] shared_cache = _thread_local.conversion_cache for i, col in enumerate(self.src_cols): if col is None: @@ -329,10 +316,7 @@ def get_columns(self, source, row_indices, out=None, target_indices=None): else: data.append(col_array) - if self.results_inplace: - return out - else: - return self.join_columns(data) + return out if self.results_inplace else self.join_columns(data) def join_columns(self, data): if self.is_sparse: @@ -355,19 +339,14 @@ def join_columns(self, data): return out.tocsr() def join_partial_results(self, parts): - if self.is_sparse: - return sp.vstack(parts) - else: - return parts + return sp.vstack(parts) if self.is_sparse else parts def init_partial_results(self, n_rows): - if not self.results_inplace: - return [] # list to store partial results - else: # a dense numpy array - # F-order enables faster writing to the array while accessing and - # matrix operations work with same speed (e.g. dot) - return np.zeros((n_rows, len(self.src_cols)), - order="F", dtype=self.dtype) + return ( + np.zeros((n_rows, len(self.src_cols)), order="F", dtype=self.dtype) + if self.results_inplace + else [] + ) def add_partial_result(self, parts, part): if not self.results_inplace: @@ -409,11 +388,10 @@ def convert(self, source, row_indices, clear_cache_after_part): out = array_conv.get_subarray(source, row_indices) res[array_conv.target] = out - parts = {} - - for array_conv in self.columnwise: - parts[array_conv.target] = array_conv.init_partial_results(n_rows) - + parts = { + array_conv.target: array_conv.init_partial_results(n_rows) + for array_conv in self.columnwise + } if n_rows <= self.max_rows_at_once: for array_conv in self.columnwise: out = array_conv.get_columns(source, row_indices, @@ -443,7 +421,7 @@ def convert(self, source, row_indices, clear_cache_after_part): for array_conv in self.columnwise: res[array_conv.target] = \ - array_conv.join_partial_results(parts[array_conv.target]) + array_conv.join_partial_results(parts[array_conv.target]) return res["X"], res["Y"], res["metas"] @@ -540,9 +518,7 @@ def __setstate__(self, state): def no_view(x): # Some arrays can be unpickled as views; ensure they are not - if isinstance(x, np.ndarray) and x.base is not None: - return x.copy() - return x + return x.copy() if isinstance(x, np.ndarray) and x.base is not None else x self._initialize_unlocked() # __dict__ seems to be cleared before calling __setstate__ with self.unlocked_reference(): @@ -558,8 +534,8 @@ def __getstate__(self): # return the same state as before table lock state = self.__dict__.copy() for k in ["X", "metas", "W"]: - if "_" + k in state: # Check existence; SQL tables do not contain them - state[k] = state.pop("_" + k) + if f"_{k}" in state: # Check existence; SQL tables do not contain them + state[k] = state.pop(f"_{k}") # before locking, _Y was always a 2d array: save it as such if "_Y" in state: y = state.pop("_Y") @@ -762,10 +738,7 @@ def from_domain(cls, domain, n_rows=0, weights=False): self.Y = np.zeros((n_rows, len(domain.class_vars))) else: self.Y = np.zeros(n_rows) - if weights: - self.W = np.ones(n_rows) - else: - self.W = np.empty((n_rows, 0)) + self.W = np.ones(n_rows) if weights else np.empty((n_rows, 0)) self.metas = np.empty((n_rows, len(self.domain.metas)), object) cls._init_ids(self) self.attributes = {} @@ -940,8 +913,7 @@ def from_numpy(cls, domain, X, Y=None, metas=None, W=None, if X.shape[1] != len(domain.attributes): raise ValueError( - "Invalid number of variable columns ({} != {})".format( - X.shape[1], len(domain.attributes)) + f"Invalid number of variable columns ({X.shape[1]} != {len(domain.attributes)})" ) if Y.ndim == 1: if not domain.class_var: @@ -950,13 +922,11 @@ def from_numpy(cls, domain, X, Y=None, metas=None, W=None, f"(1 != {len(domain.class_vars)})") elif Y.shape[1] != len(domain.class_vars): raise ValueError( - "Invalid number of class columns ({} != {})".format( - Y.shape[1], len(domain.class_vars)) + f"Invalid number of class columns ({Y.shape[1]} != {len(domain.class_vars)})" ) if metas.shape[1] != len(domain.metas): raise ValueError( - "Invalid number of meta attribute columns ({} != {})".format( - metas.shape[1], len(domain.metas)) + f"Invalid number of meta attribute columns ({metas.shape[1]} != {len(domain.metas)})" ) if not X.shape[0] == Y.shape[0] == metas.shape[0] == W.shape[0]: raise ValueError( @@ -1072,10 +1042,8 @@ def save(self, filename): from Orange.data.io import FileFormat writer = FileFormat.writers.get(ext) if not writer: - desc = FileFormat.names.get(ext) - if desc: - raise IOError( - "Writing of {}s is not supported".format(desc.lower())) + if desc := FileFormat.names.get(ext): + raise IOError(f"Writing of {desc.lower()}s is not supported") else: raise IOError("Unknown file name extension.") writer.write_file(filename, self) @@ -1237,7 +1205,7 @@ def __setitem__(self, key, value): else: col_idx, values = [col_idx], [value] if isinstance(col_idx, DiscreteVariable) \ - and self.domain[col_idx] != col_idx: + and self.domain[col_idx] != col_idx: values = self.domain[col_idx].get_mapper_from(col_idx)(values) for val, col_idx in zip(values, col_idx): if not isinstance(val, Integral): @@ -1284,25 +1252,22 @@ def __setitem__(self, key, value): value = Unknown if not isinstance(value, (Real, np.ndarray)) and \ - (len(attr_cols) or len(class_cols)): + (len(attr_cols) or len(class_cols)): raise TypeError( "Ordinary attributes can only have primitive values") - if len(attr_cols): - if self.X.size: - self.X[row_idx, attr_cols] = value - if len(class_cols): - if self._Y.size: - if self._Y.ndim == 1 and np.all(class_cols == 0): - if isinstance(value, np.ndarray): - yshape = self._Y[row_idx].shape - if value.shape != yshape: - value = value.reshape(yshape) - self._Y[row_idx] = value - else: - self._Y[row_idx, class_cols] = value - if len(meta_cols): - if self._metas.size: - self.metas[row_idx, meta_cols] = value + if len(attr_cols) and self.X.size: + self.X[row_idx, attr_cols] = value + if len(class_cols) and self._Y.size: + if self._Y.ndim == 1 and np.all(class_cols == 0): + if isinstance(value, np.ndarray): + yshape = self._Y[row_idx].shape + if value.shape != yshape: + value = value.reshape(yshape) + self._Y[row_idx] = value + else: + self._Y[row_idx, class_cols] = value + if len(meta_cols) and self._metas.size: + self.metas[row_idx, meta_cols] = value def __len__(self): return self.X.shape[0] @@ -1356,10 +1321,9 @@ def concatenate(cls, tables, axis=0): # TODO: Add attributes = {} to __init__ conc.attributes = getattr(conc, "attributes", {}) for table in reversed(tables): - conc.attributes.update(table.attributes) + conc.attributes |= table.attributes - names = [table.name for table in tables if table.name != "untitled"] - if names: + if names := [table.name for table in tables if table.name != "untitled"]: conc.name = names[0] return conc @@ -1462,10 +1426,7 @@ def ensure_copy(self): """ def is_view(x): - if not sp.issparse(x): - return x.base is not None - else: - return x.data.base is not None + return x.data.base is not None if sp.issparse(x) else x.base is not None if is_view(self._X): self._X = self._X.copy() @@ -1525,9 +1486,7 @@ def total_weight(self): Return the total weight of instances in the table, or their number if they are unweighted. """ - if self.W.shape[-1]: - return sum(self.W) - return len(self) + return sum(self.W) if self.W.shape[-1] else len(self) def has_missing(self): """Return `True` if there are any missing attribute or class values.""" @@ -1546,7 +1505,7 @@ def has_missing_class(self): def __get_nan_frequency(data): if data.size == 0: return 0 - dense = data if not sp.issparse(data) else data.data + dense = data.data if sp.issparse(data) else data return np.isnan(dense).sum() / np.prod(data.shape) def get_nan_frequency_attribute(self): @@ -1595,10 +1554,7 @@ def get_column_view(self, index: Union[Integral, Variable]) -> np.ndarray: :type index: int, str or Orange.data.Variable :return: (one-dimensional numpy array, sparse) """ - if isinstance(index, Integral): - col_index = index - else: - col_index = self.domain.index(index) + col_index = index if isinstance(index, Integral) else self.domain.index(index) col = self._get_column_view(col_index) sparse = sp.issparse(col) @@ -1609,7 +1565,7 @@ def get_column_view(self, index: Union[Integral, Variable]) -> np.ndarray: col = np.asarray(col.todense())[:, 0] if isinstance(index, DiscreteVariable) \ - and index.values != self.domain[col_index].values: + and index.values != self.domain[col_index].values: col = index.get_mapper_from(self.domain[col_index])(col) col.flags.writeable = False warnings.warn("get_column_view is returning a mapped copy of " @@ -1617,15 +1573,14 @@ def get_column_view(self, index: Union[Integral, Variable]) -> np.ndarray: return col, sparse def _get_column_view(self, index: Integral) -> np.ndarray: - if index >= 0: - if index < self.X.shape[1]: - return self.X[:, index] - elif self._Y.ndim == 1 and index == self._X.shape[1]: - return self._Y - else: - return self._Y[:, index - self.X.shape[1]] - else: + if index < 0: return self.metas[:, -1 - index] + if index < self.X.shape[1]: + return self.X[:, index] + elif self._Y.ndim == 1 and index == self._X.shape[1]: + return self._Y + else: + return self._Y[:, index - self.X.shape[1]] def get_column(self, index, copy=False): """ @@ -1733,10 +1688,7 @@ def _filter_has_class(self, negate=False): retain = (self._Y.indptr[1:] == self._Y.indptr[-1:] + self._Y.shape[1]) else: - if self._Y.ndim == 1: - retain = np.isnan(self._Y) - else: - retain = bn.anynan(self._Y, axis=1) + retain = np.isnan(self._Y) if self._Y.ndim == 1 else bn.anynan(self._Y, axis=1) if not negate: retain = np.logical_not(retain) return self.from_table_rows(self, retain) @@ -1812,11 +1764,7 @@ def _filter_to_indicator(self, filter): def get_col_indices(): cols = chain(self.domain.variables, self.domain.metas) if isinstance(filter, IsDefined): - if filter.columns is not None: - return list(filter.columns) - else: - return list(cols) - + return list(filter.columns) if filter.columns is not None else list(cols) if filter.column is not None: return [filter.column] @@ -2036,20 +1984,19 @@ def _compute_distributions(self, columns=None): elif not x.shape[0]: dist, unknowns = np.zeros((2, 0)), 0 else: - if W is not None: - if sp.issparse(x): - arg_sort = np.argsort(x.data) - ranks = x.indices[arg_sort] - vals = np.vstack((x.data[arg_sort], W[ranks])) - else: - ranks = np.argsort(x) - vals = np.vstack((x[ranks], W[ranks])) - else: + if W is None: x_values = x.data if sp.issparse(x) else x vals = np.ones((2, x_values.shape[0])) vals[0, :] = x_values vals[0, :].sort() + elif sp.issparse(x): + arg_sort = np.argsort(x.data) + ranks = x.indices[arg_sort] + vals = np.vstack((x.data[arg_sort], W[ranks])) + else: + ranks = np.argsort(x) + vals = np.vstack((x[ranks], W[ranks])) dist = np.array(_valuecount.valuecount(vals)) # If sparse, then 0s will not be counted with `valuecount`, so # we have to add them to the result manually. @@ -2076,8 +2023,8 @@ def _compute_contingency(self, col_vars=None, row_var=None): col_vars = [self.domain.index(var) for var in col_vars] if row_var is None: row_var = self.domain.class_var - if row_var is None: - raise ValueError("No row variable") + if row_var is None: + raise ValueError("No row variable") row_desc = self.domain[row_var] if not row_desc.is_discrete: @@ -2120,8 +2067,7 @@ def _compute_contingency(self, col_vars=None, row_var=None): arr_indi = [e for e, ind in enumerate(col_indi) if f_cond(ind)] vars = [(e, f_ind(col_indi[e]), col_desc[e]) for e in arr_indi] - disc_vars = [v for v in vars if v[2].is_discrete] - if disc_vars: + if disc_vars := [v for v in vars if v[2].is_discrete]: if sp.issparse(arr): max_vals = max(len(v[2].values) for v in disc_vars) disc_indi = {i for _, i, _ in disc_vars} @@ -2140,8 +2086,7 @@ def _compute_contingency(self, col_vars=None, row_var=None): col.astype(float), row_data, len(var.values) - 1, n_rows - 1, W) - cont_vars = [v for v in vars if v[2].is_continuous] - if cont_vars: + if cont_vars := [v for v in vars if v[2].is_continuous]: W_ = None if W is not None: W_ = W.astype(dtype=np.float64) diff --git a/Orange/data/tests/test_io.py b/Orange/data/tests/test_io.py index 0490a9671d3..b9633b09ed4 100644 --- a/Orange/data/tests/test_io.py +++ b/Orange/data/tests/test_io.py @@ -43,7 +43,7 @@ def test_guess_data_type_discrete(self): np.testing.assert_array_equal(['1', '2', '1', '2', 'a'], values) # just below the threshold for string variable - in_values = list(map(lambda x: str(x) + "a", range(24))) + ["a"] * 76 + in_values = list(map(lambda x: f"{str(x)}a", range(24))) + ["a"] * 76 valuemap, values, coltype = guess_data_type(in_values) self.assertEqual(DiscreteVariable, coltype) self.assertEqual(natural_sorted(set(in_values)), valuemap) @@ -52,14 +52,14 @@ def test_guess_data_type_discrete(self): def test_guess_data_type_string(self): # should be StringVariable # too many different values for discrete - in_values = list(map(lambda x: str(x) + "a", range(90))) + in_values = list(map(lambda x: f"{str(x)}a", range(90))) valuemap, values, coltype = guess_data_type(in_values) self.assertEqual(StringVariable, coltype) self.assertIsNone(valuemap) np.testing.assert_array_equal(in_values, values) # more than len(values)**0.7 - in_values = list(map(lambda x: str(x) + "a", range(25))) + ["a"] * 75 + in_values = list(map(lambda x: f"{str(x)}a", range(25))) + ["a"] * 75 valuemap, values, coltype = guess_data_type(in_values) self.assertEqual(StringVariable, coltype) self.assertIsNone(valuemap) @@ -68,7 +68,7 @@ def test_guess_data_type_string(self): # more than 100 different values - exactly 101 # this is the case when len(values)**0.7 rule would vote for the # DiscreteVariable - in_values = list(map(lambda x: str(x) + "a", range(100))) + ["a"] * 999 + in_values = list(map(lambda x: f"{str(x)}a", range(100))) + ["a"] * 999 valuemap, values, coltype = guess_data_type(in_values) self.assertEqual(StringVariable, coltype) self.assertIsNone(valuemap) diff --git a/Orange/data/tests/test_pandas.py b/Orange/data/tests/test_pandas.py index 22cbe313b02..000ca11d473 100644 --- a/Orange/data/tests/test_pandas.py +++ b/Orange/data/tests/test_pandas.py @@ -88,8 +88,8 @@ def test_table_to_frame(self): self.assertEqual(sorted(table_column_names), sorted(frame_column_names)) self.assertEqual(type(df['iris'].dtype), pd.api.types.CategoricalDtype) - self.assertEqual(list(df['sepal length'])[0:4], [5.1, 4.9, 4.7, 4.6]) - self.assertEqual(list(df['iris'])[0:2], ['Iris-setosa', 'Iris-setosa']) + self.assertEqual(list(df['sepal length'])[:4], [5.1, 4.9, 4.7, 4.6]) + self.assertEqual(list(df['iris'])[:2], ['Iris-setosa', 'Iris-setosa']) def test_table_to_frame_object_dtype(self): from Orange.data.pandas_compat import table_to_frame @@ -620,7 +620,7 @@ def test_concat_table(self): table2.W) attrs = {} - attrs.update(self.table.attributes) + attrs |= self.table.attributes attrs.update(table2.attributes) self.assertEqual(table3.attributes, attrs) diff --git a/Orange/data/tests/test_variable.py b/Orange/data/tests/test_variable.py index c8d9b027cd9..59739947d1f 100644 --- a/Orange/data/tests/test_variable.py +++ b/Orange/data/tests/test_variable.py @@ -37,10 +37,7 @@ def is_on_path(name): ------- found : bool """ - for _, name_, _ in pkgutil.iter_modules(sys.path): - if name == name_: - return True - return False + return any(name == name_ for _, name_, _ in pkgutil.iter_modules(sys.path)) # noinspection PyPep8Naming,PyUnresolvedReferences @@ -861,7 +858,7 @@ def test_additional_formats(self): for f in TimeVariable.ADDITIONAL_FORMATS[k][0] ] # test any equal to expected - self.assertTrue(any(d == expected for d in parsed)) + self.assertTrue(expected in parsed) # test that no other equal to any other date - only nan or expected self.assertTrue(any(d == expected or pd.isnull(d) for d in parsed)) diff --git a/Orange/data/util.py b/Orange/data/util.py index bf5588abd57..92ce99a4933 100644 --- a/Orange/data/util.py +++ b/Orange/data/util.py @@ -230,9 +230,9 @@ def get_unique_names(names, proposed, equal_numbers=True): indices = {name: get_indices(names, name) for name in proposed} indices = {name: max(ind) + 1 for name, ind in indices.items() if ind} - duplicated_proposed = {name for name, count in Counter(proposed).items() - if count > 1} - if duplicated_proposed: + if duplicated_proposed := { + name for name, count in Counter(proposed).items() if count > 1 + }: # This could be merged with the code below, but it would make it slower # because it can't be done within list comprehension if equal_numbers: @@ -240,8 +240,7 @@ def get_unique_names(names, proposed, equal_numbers=True): indices = {name: max_index for name in chain(indices, duplicated_proposed)} else: - indices.update({name: 1 - for name in duplicated_proposed - set(indices)}) + indices |= {name: 1 for name in duplicated_proposed - set(indices)} names = [] for name in proposed: if name in indices: @@ -251,15 +250,14 @@ def get_unique_names(names, proposed, equal_numbers=True): names.append(name) return names - if not (set(proposed) & set(names) or indices): + if not set(proposed) & set(names) and not indices: return proposed - if equal_numbers: - max_index = max(indices.values()) - return [f"{name} ({max_index})" for name in proposed] - else: + if not equal_numbers: return [f"{name} ({indices[name]})" if name in indices else name for name in proposed] + max_index = max(indices.values()) + return [f"{name} ({max_index})" for name in proposed] def get_unique_names_duplicates(proposed: list, return_duplicated=False) -> list: @@ -275,9 +273,7 @@ def get_unique_names_duplicates(proposed: list, return_duplicated=False) -> list if name and cnt > 1} new_names = [f"{name} ({next(indices[name])})" if name in indices else name for name in proposed] - if return_duplicated: - return new_names, list(indices) - return new_names + return (new_names, list(indices)) if return_duplicated else new_names def get_unique_names_domain(attributes, class_vars=(), metas=()): @@ -321,7 +317,7 @@ def sanitized_name(name: str) -> str: """ sanitized = re.sub(r"\W", "_", name) if sanitized[0].isdigit(): - sanitized = "_" + sanitized + sanitized = f"_{sanitized}" return sanitized diff --git a/Orange/data/variable.py b/Orange/data/variable.py index f7b322501de..977ecaa2923 100644 --- a/Orange/data/variable.py +++ b/Orange/data/variable.py @@ -36,12 +36,11 @@ def make_variable(cls, compute_value, *args): if compute_value is not None: return cls(*args, compute_value=compute_value) - else: - # For compatibility with old pickles: remove the second arg if it's - # bool `compute_value` (args[3]) can't be bool, so this should be safe - if len(args) > 2 and isinstance(args[2], bool): - args = args[:2] + args[3:] - return cls(*args) + # For compatibility with old pickles: remove the second arg if it's + # bool `compute_value` (args[3]) can't be bool, so this should be safe + if len(args) > 2 and isinstance(args[2], bool): + args = args[:2] + args[3:] + return cls(*args) def is_discrete_values(values): @@ -77,9 +76,12 @@ def is_discrete_values(values): return False # Strip NaN from unique - unique = {i for i in unique - if (not i in MISSING_VALUES and - not (isinstance(i, Number) and np.isnan(i)))} + unique = { + i + for i in unique + if i not in MISSING_VALUES + and not (isinstance(i, Number) and np.isnan(i)) + } # All NaNs => indeterminate if not unique: @@ -214,8 +216,7 @@ def __init__(self, _, __=Unknown): pass def __repr__(self): - return "Value('%s', %s)" % (self.variable.name, - self.variable.repr_val(self)) + return f"Value('{self.variable.name}', {self.variable.repr_val(self)})" def __str__(self): return self.variable.str_val(self) @@ -237,15 +238,15 @@ def __ne__(self, other): def __lt__(self, other): if self.variable.is_primitive(): - if isinstance(other, str): - return super().__lt__(self.variable.to_val(other)) - else: - return super().__lt__(other) + return ( + super().__lt__(self.variable.to_val(other)) + if isinstance(other, str) + else super().__lt__(other) + ) + if isinstance(other, str): + return self.value < other else: - if isinstance(other, str): - return self.value < other - else: - return self.value < other.value + return self.value < other.value def __le__(self, other): return self.__lt__(other) or self.__eq__(other) @@ -272,18 +273,13 @@ def __hash__(self): # == hash("green") # User should hash directly ids or domain values instead. raise TypeError("unhashable type - cannot hash values of discrete variables!") - if self._value is None: - return super().__hash__() - else: - return hash(self._value) + return super().__hash__() if self._value is None else hash(self._value) @property def value(self): if self.variable.is_discrete: return Unknown if isnan(self) else self.variable.values[int(self)] - if self.variable.is_string: - return self._value - return float(self) + return self._value if self.variable.is_string else float(self) def __getnewargs__(self): return self.variable, float(self) @@ -316,10 +312,7 @@ class _predicatedescriptor(property): False """ def __get__(self, instance, objtype=None): - if instance is None: - return self.fget - else: - return super().__get__(instance, objtype) + return self.fget if instance is None else super().__get__(instance, objtype) class Variable(Reprable, metaclass=VariableMeta): @@ -611,7 +604,7 @@ def number_of_decimals(self, x): self._max_round_diff = 10 ** (-x - 6) self.adjust_decimals = 0 if self._number_of_decimals <= MAX_NUM_OF_DECIMALS: - self._format_str = "%.{}f".format(self.number_of_decimals) + self._format_str = f"%.{self.number_of_decimals}f" else: self._format_str = "%g" @@ -619,9 +612,7 @@ def to_val(self, s): """ Convert a value, given as an instance of an arbitrary type, to a float. """ - if s in self.unknown_str: - return Unknown - return float(s) + return Unknown if s in self.unknown_str else float(s) def val_from_str_add(self, s): """ @@ -747,8 +738,7 @@ def mapper(value, col_idx=None): if isinstance(value, str): return mapping[other.values.index(value)] if isinstance(value, np.ndarray): - if not (value.ndim == 1 - or value.ndim != 2 and min(value.shape) != 1): + if value.ndim != 1 and (value.ndim == 2 or min(value.shape) == 1): raise ValueError( f"Column mapping can't map {value.ndim}-d objects") @@ -802,8 +792,7 @@ def to_val(self, s): if s in self.unknown_str: return ValueUnknown if not isinstance(s, str): - raise TypeError('Cannot convert {} to value of "{}"'.format( - type(s).__name__, self.name)) + raise TypeError(f'Cannot convert {type(s).__name__} to value of "{self.name}"') if s not in self._value_index: raise ValueError(f"Value {s} does not exist") return self._value_index[s] @@ -845,9 +834,7 @@ def repr_val(self, val): :type val: float (should be whole number) :rtype: str """ - if isnan(val): - return "?" - return '{}'.format(self.values[int(val)]) + return "?" if isnan(val) else f'{self.values[int(val)]}' str_val = repr_val @@ -887,9 +874,7 @@ def to_val(self, s): """ if s is None: return "" - if isinstance(s, str): - return s - return str(s) + return s if isinstance(s, str) else str(s) val_from_str_add = to_val @@ -906,7 +891,7 @@ def str_val(val): def repr_val(self, val): """Return a string representation of the value.""" - return '"{}"'.format(self.str_val(val)) + return f'"{self.str_val(val)}"' class TimeVariable(ContinuousVariable): @@ -1145,10 +1130,8 @@ def parse(self, datestr): if not self._matches_iso_format(datestr): try: - # If it is a number, assume it is a unix timestamp - value = float(datestr) self.have_date = self.have_time = 1 - return value + return float(datestr) except ValueError: raise self.InvalidDateTimeFormatError(datestr) @@ -1206,7 +1189,4 @@ def to_val(self, s): """ Convert a value, given as an instance of an arbitrary type, to a float. """ - if isinstance(s, str): - return self.parse(s) - else: - return super().to_val(s) + return self.parse(s) if isinstance(s, str) else super().to_val(s) diff --git a/Orange/datasets/list_update.py b/Orange/datasets/list_update.py index 4899e3f1a63..a65f841c617 100644 --- a/Orange/datasets/list_update.py +++ b/Orange/datasets/list_update.py @@ -34,11 +34,9 @@ def data_info(name, location): } if __name__ == "__main__": - info = dict() - - for name, location in external_datasets: - info[name] = data_info(name, location) - + info = { + name: data_info(name, location) for name, location in external_datasets + } for fname in os.listdir('.'): if not os.path.isfile(fname): continue diff --git a/Orange/distance/base.py b/Orange/distance/base.py index 02ebb96d755..5f15d43e04a 100644 --- a/Orange/distance/base.py +++ b/Orange/distance/base.py @@ -36,9 +36,14 @@ def remove_discrete_features(data, to_metas=False): new_domain = Domain( [a for a in data.domain.attributes if a.is_continuous], data.domain.class_vars, - data.domain.metas - + (() if not to_metas - else tuple(a for a in data.domain.attributes if not a.is_continuous)) + ( + data.domain.metas + + ( + tuple(a for a in data.domain.attributes if not a.is_continuous) + if to_metas + else () + ) + ), ) return data.transform(new_domain) @@ -558,8 +563,8 @@ def __call__(self, e1, e2=None, axis=1, impute=False): x1, x2, metric=self.metric) if impute and np.isnan(dist).any(): dist = np.nan_to_num(dist) - if isinstance(e1, (Table, RowInstance)): - dist_matrix = DistMatrix(dist, e1, e2, axis) - else: - dist_matrix = DistMatrix(dist) - return dist_matrix + return ( + DistMatrix(dist, e1, e2, axis) + if isinstance(e1, (Table, RowInstance)) + else DistMatrix(dist) + ) diff --git a/Orange/distance/distance.py b/Orange/distance/distance.py index 51fdc0fe450..a965903825f 100644 --- a/Orange/distance/distance.py +++ b/Orange/distance/distance.py @@ -116,10 +116,7 @@ def compute_distances(self, x1, x2=None): # adapted from sklearn.metric.euclidean_distances xx = row_norms(data1, squared=True)[:, np.newaxis] - if x2 is not None: - yy = row_norms(data2, squared=True)[np.newaxis, :] - else: - yy = xx.T + yy = row_norms(data2, squared=True)[np.newaxis, :] if x2 is not None else xx.T distances = _safe_sparse_dot(data1, data2.T, dense_output=True, callback=callbacks.next()) distances *= -2 @@ -130,7 +127,7 @@ def compute_distances(self, x1, x2=None): if x2 is None: distances.flat[::distances.shape[0] + 1] = 0.0 fixer = _distance.fix_euclidean_rows_normalized if self.normalize \ - else _distance.fix_euclidean_rows + else _distance.fix_euclidean_rows fixer(distances, data1, data2, self.means, self.vars, self.dist_missing2_cont, x2 is not None, callbacks.next()) @@ -534,8 +531,7 @@ def _compute_sparse(self, x1, x2=None): callback(i * 100 / n) xi_ind = set(x1[i].indices) for j in range(i if symmetric else m): - union = len(xi_ind.union(x2[j].indices)) - if union: + if union := len(xi_ind.union(x2[j].indices)): jacc = 1 - len(xi_ind.intersection(x2[j].indices)) / union else: jacc = 0 @@ -581,15 +577,9 @@ def __init__(self, absolute, axis=1, impute=False, *, similarity=False): def compute_distances(self, x1, x2): rho = self.compute_correlation(x1, x2) if self.similarity: - if self.absolute: - return np.abs(rho) - else: - return rho + return np.abs(rho) if self.absolute else rho else: - if self.absolute: - return 1. - np.abs(rho) - else: - return 0.5 - rho / 2 + return 1. - np.abs(rho) if self.absolute else 0.5 - rho / 2 def compute_correlation(self, x1, x2): raise NotImplementedError() @@ -597,22 +587,21 @@ def compute_correlation(self, x1, x2): class SpearmanModel(CorrelationDistanceModel): def compute_correlation(self, x1, x2): - if x2 is None: - n1 = x1.shape[1 - self.axis] - if n1 == 1: - rho = 1.0 - elif n1 == 2: - # Special case to properly fill degenerate self correlations - # (nan, inf on the diagonals) - rho = stats.spearmanr(x1, x1, axis=self.axis)[0] - assert rho.shape == (4, 4) - rho = rho[:2, :2].copy() - else: - # scalar if n1 == 1 - rho = stats.spearmanr(x1, axis=self.axis)[0] - return np.atleast_2d(rho) - else: + if x2 is not None: return _spearmanr2(x1, x2, axis=self.axis) + n1 = x1.shape[1 - self.axis] + if n1 == 1: + rho = 1.0 + elif n1 == 2: + # Special case to properly fill degenerate self correlations + # (nan, inf on the diagonals) + rho = stats.spearmanr(x1, x1, axis=self.axis)[0] + assert rho.shape == (4, 4) + rho = rho[:2, :2].copy() + else: + # scalar if n1 == 1 + rho = stats.spearmanr(x1, axis=self.axis)[0] + return np.atleast_2d(rho) def _spearmanr2(a, b, axis=0): @@ -677,7 +666,7 @@ def _corrcoef2(a, b, axis=0): """ a, b = np.atleast_2d(a, b) if axis not in (0, 1): - raise ValueError("Invalid axis {} (only 0 or 1 accepted)".format(axis)) + raise ValueError(f"Invalid axis {axis} (only 0 or 1 accepted)") mean_a = np.mean(a, axis=axis, keepdims=True) mean_b = np.mean(b, axis=axis, keepdims=True) @@ -731,11 +720,10 @@ def fit(self, _): class PearsonModel(CorrelationDistanceModel): def compute_correlation(self, x1, x2): - if x2 is None: - c = np.corrcoef(x1, rowvar=self.axis == 1) - return np.atleast_2d(c) - else: + if x2 is not None: return _corrcoef2(x1, x2, axis=self.axis) + c = np.corrcoef(x1, rowvar=self.axis == 1) + return np.atleast_2d(c) class PearsonR(CorrelationDistance): @@ -850,9 +838,7 @@ class MahalanobisDistance: is provided in this class. """ def __new__(cls, data=None, axis=1, _='Mahalanobis'): - if data is None: - return cls - return Mahalanobis(axis=axis).fit(data) + return cls if data is None else Mahalanobis(axis=axis).fit(data) class Hamming(Distance): diff --git a/Orange/distance/tests/test_distance.py b/Orange/distance/tests/test_distance.py index a90b062481d..7950986a102 100644 --- a/Orange/distance/tests/test_distance.py +++ b/Orange/distance/tests/test_distance.py @@ -218,8 +218,7 @@ def test_euclidean_disc(self): [[1, 0, 1, 1], [2/3, 2/3, 1, 2/3], [2/3, 1/3, 1, 1]]) - assert_almost_equal(model.dist_missing2_disc, - [1 - 1, 1 - 3/9, 1 - 5/9]) + assert_almost_equal(model.dist_missing2_disc, [0, 1 - 3/9, 1 - 5/9]) dist = model(data) assert_almost_equal(dist, @@ -503,8 +502,7 @@ def test_manhattan_disc(self): [[1, 0, 1, 1], [2/3, 2/3, 1, 2/3], [2/3, 1/3, 1, 1]]) - assert_almost_equal(model.dist_missing2_disc, - [1 - 1, 1 - 3/9, 1 - 5/9]) + assert_almost_equal(model.dist_missing2_disc, [0, 1 - 3/9, 1 - 5/9]) dist = model(data) assert_almost_equal(dist, diff --git a/Orange/ensembles/stack.py b/Orange/ensembles/stack.py index 17e22362c78..d5d52c8b9f5 100644 --- a/Orange/ensembles/stack.py +++ b/Orange/ensembles/stack.py @@ -78,9 +78,10 @@ def fit_storage(self, data): else: X = res.predicted.T use_prob = False - dom = Domain([ContinuousVariable('f{}'.format(i + 1)) - for i in range(X.shape[1])], - data.domain.class_var) + dom = Domain( + [ContinuousVariable(f'f{i + 1}') for i in range(X.shape[1])], + data.domain.class_var, + ) stacked_data = data.transform(dom).copy() with stacked_data.unlocked_reference(): stacked_data.X = X diff --git a/Orange/evaluation/clustering.py b/Orange/evaluation/clustering.py index acb81c2a3e8..abbaba9fd67 100644 --- a/Orange/evaluation/clustering.py +++ b/Orange/evaluation/clustering.py @@ -142,8 +142,7 @@ def graph_silhouette(X, y, xlim=None, colors=None, figsize=None, filename=None): s = s[np.argsort(y)] # Sort by clusters parts = [] # Within clusters sort by silhouette scores - for label, (i, j) in enumerate([(sum(y == c1), sum(y == c1) + sum(y == c2)) - for c1, c2 in zip(range(-1, N-1), range(0, N))]): + for label, (i, j) in enumerate((sum(y == c1), sum(y == c1) + sum(y == c2)) for c1, c2 in zip(range(-1, N-1), range(N))): scores = sorted(s[i:j]) parts.append((scores, label)) diff --git a/Orange/evaluation/scoring.py b/Orange/evaluation/scoring.py index 9c6f7102719..974fabb3729 100644 --- a/Orange/evaluation/scoring.py +++ b/Orange/evaluation/scoring.py @@ -43,8 +43,8 @@ def __new__(mcs, name, bases, dict_, **kwargs): cls.registry = {} return cls - def __init__(cls, *args, **_): - WrapperMeta.__init__(cls, *args) + def __init__(self, *args, **_): + WrapperMeta.__init__(self, *args) class Score(metaclass=ScoreMetaType): @@ -86,14 +86,12 @@ def __call__(self, results, **kwargs): return self.compute_score(results, **kwargs) def average(self, scores): - if self.is_scalar: - return np.mean(scores, axis=0) - return NotImplementedError + return np.mean(scores, axis=0) if self.is_scalar else NotImplementedError def scores_by_folds(self, results, **kwargs): nfolds = len(results.folds) - nmodels = len(results.predicted) if self.is_scalar: + nmodels = len(results.predicted) scores = np.empty((nfolds, nmodels), dtype=np.float64) else: scores = [None] * nfolds @@ -103,8 +101,7 @@ def scores_by_folds(self, results, **kwargs): return scores def compute_score(self, results): - wraps = type(self).__wraps__ # self.__wraps__ is invisible - if wraps: + if wraps := type(self).__wraps__: return self.from_predicted(results, wraps) else: return NotImplementedError @@ -347,25 +344,24 @@ def multi_class_specificity(self, results): return np.sum(scores.T * weights, axis=1) def compute_score(self, results, target=None, average="binary"): + if target is not None: + return self.single_class_specificity(results, target) domain = results.domain n_classes = len(domain.class_var.values) - if target is None: - if average == "weighted": - return self.multi_class_specificity(results) - elif average == "binary": # average is binary - if n_classes != 2: - raise ValueError( - "Binary averaging needs two classes in data: " - "specify target class or use " - "weighted averaging.") - return self.single_class_specificity(results, 1) - else: + if average == "weighted": + return self.multi_class_specificity(results) + elif average == "binary": # average is binary + if n_classes != 2: raise ValueError( - "Wrong parameters: For averaging select one of the " - "following values: ('weighted', 'binary')") - elif target is not None: - return self.single_class_specificity(results, target) + "Binary averaging needs two classes in data: " + "specify target class or use " + "weighted averaging.") + return self.single_class_specificity(results, 1) + else: + raise ValueError( + "Wrong parameters: For averaging select one of the " + "following values: ('weighted', 'binary')") class MatthewsCorrCoefficient(ClassificationScore): diff --git a/Orange/evaluation/testing.py b/Orange/evaluation/testing.py index 34a798241b6..a3887dd7f8a 100644 --- a/Orange/evaluation/testing.py +++ b/Orange/evaluation/testing.py @@ -264,15 +264,13 @@ def get_augmented_data(self, model_names, data = self.data[self.row_indices] domain = data.domain class_var = domain.class_var - classification = class_var and class_var.is_discrete - new_meta_attr = [] new_meta_vals = np.empty((len(data), 0)) names = [var.name for var in chain(domain.attributes, domain.metas, domain.class_vars)] - if classification: + if classification := class_var and class_var.is_discrete: # predictions if include_predictions: uniq_new, names = self.create_unique_vars(names, model_names, class_var.values) @@ -409,10 +407,7 @@ def __new__(cls, # Explicitly call __init__ because Python won't self.__init__(store_data=store_data, store_models=store_models, **kwargs) - if test_data is not None: - test_data_kwargs = {"test_data": test_data} - else: - test_data_kwargs = {} + test_data_kwargs = {"test_data": test_data} if test_data is not None else {} return self(data, learners=learners, preprocessor=preprocessor, callback=callback, **test_data_kwargs) diff --git a/Orange/evaluation/tests/test_performance_curves.py b/Orange/evaluation/tests/test_performance_curves.py index a73d7165557..745fca5692b 100644 --- a/Orange/evaluation/tests/test_performance_curves.py +++ b/Orange/evaluation/tests/test_performance_curves.py @@ -43,18 +43,60 @@ def test_curves(self): 13, 12, 11, 10, 11, 10]) / 19) precision = np.array( - [9 / 19, 9 / 18, 8 / 17, 8 / 16, 7 / 15, 7 / 14, 7 / 13, - 7 / 12, 6 / 11, 6 / 10, 5 / 9, 5 / 8, 4 / 7, 4 / 6, - 4 / 5, 3 / 4, 2 / 3, 1 / 2, 1 / 1, 1]) + [ + 9 / 19, + 9 / 18, + 8 / 17, + 8 / 16, + 7 / 15, + 7 / 14, + 7 / 13, + 7 / 12, + 6 / 11, + 6 / 10, + 5 / 9, + 5 / 8, + 4 / 7, + 4 / 6, + 4 / 5, + 3 / 4, + 2 / 3, + 1 / 2, + 1, + 1, + ] + ) np.testing.assert_almost_equal(curves.precision(), precision) np.testing.assert_almost_equal(curves.recall(), tp / 9) np.testing.assert_almost_equal(curves.ppv(), precision) np.testing.assert_almost_equal( curves.npv(), - np.array([1, 1 / 1, 1 / 2, 2 / 3, 2 / 4, 3 / 5, 4 / 6, 5 / 7, - 5 / 8, 6 / 9, 6 / 10, 7 / 11, 7 / 12, 8 / 13, 9 / 14, - 9 / 15, 9 / 16, 9 / 17, 10 / 18, 10 / 19])) + np.array( + [ + 1, + 1, + 1 / 2, + 2 / 3, + 2 / 4, + 3 / 5, + 4 / 6, + 5 / 7, + 5 / 8, + 6 / 9, + 6 / 10, + 7 / 11, + 7 / 12, + 8 / 13, + 9 / 14, + 9 / 15, + 9 / 16, + 9 / 17, + 10 / 18, + 10 / 19, + ] + ), + ) np.testing.assert_almost_equal(curves.tpr(), tp / 9) np.testing.assert_almost_equal(curves.fpr(), (10 - tn) / 10) diff --git a/Orange/misc/__init__.py b/Orange/misc/__init__.py index c88817330d2..51c45e20051 100644 --- a/Orange/misc/__init__.py +++ b/Orange/misc/__init__.py @@ -7,8 +7,12 @@ def import_late_warning(name): try: return import_module(name) except ImportError: + + + class Warn: def __getattr__(self, val): - raise ImportError("Install package '" + name - + "' to use this functionality.") + raise ImportError(f"Install package '{name}' to use this functionality.") + + return Warn() diff --git a/Orange/misc/cache.py b/Orange/misc/cache.py index ee165934a3f..4b2e9ba6987 100644 --- a/Orange/misc/cache.py +++ b/Orange/misc/cache.py @@ -12,10 +12,12 @@ def single_cache(func): @wraps(func) def _cached(*args, **kwargs): nonlocal last_args, last_kwargs, last_result - if len(last_args) != len(args) or \ - not all(x is y for x, y in zip(args, last_args)) or \ - last_kwargs != set(kwargs) or \ - any(last_kwargs[k] != kwargs[k] for k in last_kwargs): + if ( + len(last_args) != len(args) + or any(x is not y for x, y in zip(args, last_args)) + or last_kwargs != set(kwargs) + or any(last_kwargs[k] != kwargs[k] for k in last_kwargs) + ): last_result = func(*args, **kwargs) last_args, last_kwargs = args, kwargs return last_result diff --git a/Orange/misc/distmatrix.py b/Orange/misc/distmatrix.py index 5e63c22b2cd..4be5a69e3e7 100644 --- a/Orange/misc/distmatrix.py +++ b/Orange/misc/distmatrix.py @@ -68,7 +68,7 @@ def __setstate__(self, state): self.row_items = state[-3] self.col_items = state[-2] self.axis = state[-1] - super().__setstate__(state[0:-3]) + super().__setstate__(state[:-3]) @property @deprecated @@ -195,10 +195,7 @@ def _from_dst(cls, filename): symmetric = False else: flag_data = flag.split("=") - if len(flag_data) == 2: - name, value = map(str.strip, flag_data) - else: - name, value = "", None + name, value = map(str.strip, flag_data) if len(flag_data) == 2 else ("", None) if name == "axis" and value.isdigit(): axis = int(value) else: @@ -340,14 +337,13 @@ def get_labels(self, items): if not self._trivial_labels(items): return None if isinstance(items, (list, tuple)) \ - and all(isinstance(x, str) for x in items): + and all(isinstance(x, str) for x in items): return items if self.axis == 0: return [attr.name for attr in items.domain.attributes] - else: - string_var = next(var for var in items.domain.metas - if isinstance(var, StringVariable)) - return items.get_column(string_var) + string_var = next(var for var in items.domain.metas + if isinstance(var, StringVariable)) + return items.get_column(string_var) def save(self, filename): if os.path.splitext(filename)[1] == ".xlsx": diff --git a/Orange/misc/environ.py b/Orange/misc/environ.py index f2522052843..f0ed0fc10f7 100644 --- a/Orange/misc/environ.py +++ b/Orange/misc/environ.py @@ -93,14 +93,13 @@ def get_path(name: str, default: Optional[str] = None) -> Optional[str]: def _default_data_dir_base(): if sys.platform == "darwin": - base = os.path.expanduser("~/Library/Application Support") + return os.path.expanduser("~/Library/Application Support") elif sys.platform == "win32": - base = os.getenv("APPDATA", os.path.expanduser("~/AppData/Local")) + return os.getenv("APPDATA", os.path.expanduser("~/AppData/Local")) elif os.name == "posix": - base = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share")) + return os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share")) else: - base = os.path.expanduser("~/.local/share") - return base + return os.path.expanduser("~/.local/share") def data_dir_base(): @@ -155,12 +154,7 @@ def _default_cache_dir(): base = os.path.expanduser("~/.cache") base = os.path.join(base, "Orange", Orange.__version__) - if sys.platform == "win32": - # On Windows cache and data dir are the same. - # Microsoft suggest using a Cache subdirectory - return os.path.join(base, "Cache") - else: - return base + return os.path.join(base, "Cache") if sys.platform == "win32" else base def cache_dir(*args): diff --git a/Orange/misc/lazy_module.py b/Orange/misc/lazy_module.py index 80456d85b3b..7d1a7c9c4ae 100644 --- a/Orange/misc/lazy_module.py +++ b/Orange/misc/lazy_module.py @@ -5,7 +5,7 @@ def __init__(self, name): def _do_import(self): import Orange from importlib import import_module - mod = import_module('Orange.' + self.__name, package='Orange') + mod = import_module(f'Orange.{self.__name}', package='Orange') setattr(Orange, self.__name, mod) return mod diff --git a/Orange/misc/server_embedder.py b/Orange/misc/server_embedder.py index c8f9d795258..006ee0b65ed 100644 --- a/Orange/misc/server_embedder.py +++ b/Orange/misc/server_embedder.py @@ -347,15 +347,14 @@ def _parse_response(response: Response) -> Optional[List[float]]: ------- Embedding. For items that are not successfully embedded returns None. """ - if response.content: - try: - cont = json.loads(response.content.decode("utf-8")) - return cont.get("embedding", None) - except JSONDecodeError: - # in case that embedding was not successful response is not - # valid JSON - return None - else: + if not response.content: + return None + try: + cont = json.loads(response.content.decode("utf-8")) + return cont.get("embedding", None) + except JSONDecodeError: + # in case that embedding was not successful response is not + # valid JSON return None def clear_cache(self): diff --git a/Orange/misc/utils/embedder_utils.py b/Orange/misc/utils/embedder_utils.py index f19b8ee04dc..4169fd1db92 100644 --- a/Orange/misc/utils/embedder_utils.py +++ b/Orange/misc/utils/embedder_utils.py @@ -68,9 +68,7 @@ def persist_cache(self): self.save_pickle(self._cache_dict, self._cache_file_path) def get_cached_result_or_none(self, cache_key): - if cache_key in self._cache_dict: - return self._cache_dict[cache_key] - return None + return self._cache_dict[cache_key] if cache_key in self._cache_dict else None def add(self, cache_key, value): self._cache_dict[cache_key] = value @@ -87,11 +85,7 @@ def get_proxies() -> Optional[Dict[str, str]]: they not set. """ def add_scheme(url: Optional[str]) -> Optional[str]: - if url is not None and "://" not in url: - # if no scheme default to http - as other libraries do (e.g. requests) - return f"http://{url}" - else: - return url + return f"http://{url}" if url is not None and "://" not in url else url http_proxy = add_scheme(environ.get("http_proxy")) https_proxy = add_scheme(environ.get("https_proxy")) @@ -100,4 +94,4 @@ def add_scheme(url: Optional[str]) -> Optional[str]: proxy_dict["http://"] = http_proxy if https_proxy: proxy_dict["https://"] = https_proxy - return proxy_dict if proxy_dict else None + return proxy_dict or None diff --git a/Orange/misc/wrapper_meta.py b/Orange/misc/wrapper_meta.py index 12061984628..64263359a96 100644 --- a/Orange/misc/wrapper_meta.py +++ b/Orange/misc/wrapper_meta.py @@ -47,8 +47,7 @@ def __new__(cls, name, bases, dict_): ${skldoc} """ - sklname = "{}.{}".format(inspect.getmodule(wrapped).__name__, - wrapped.__name__) + sklname = f"{inspect.getmodule(wrapped).__name__}.{wrapped.__name__}" skldoc = inspect.getdoc(wrapped) or '' # FIXME: make sure skl-extended classes are API-compatible if "Attributes\n---------" in skldoc: diff --git a/Orange/modelling/base.py b/Orange/modelling/base.py index b7c2a24e10f..692ce0e8b8d 100644 --- a/Orange/modelling/base.py +++ b/Orange/modelling/base.py @@ -37,9 +37,8 @@ def _fit_model(self, data): if type(self).fit is Learner.fit: return learner.fit_storage(data) - else: - X, Y, W = data.X, data.Y, data.W if data.has_weights() else None - return learner.fit(X, Y, W) + X, Y, W = data.X, data.Y, data.W if data.has_weights() else None + return learner.fit(X, Y, W) def preprocess(self, data, progress_callback=None): return self.get_learner(data).preprocess(data, progress_callback) @@ -72,7 +71,7 @@ def get_learner(self, problem_type): None) # Prevent trying to access the learner when problem type is None if problem_type not in self.__fits__: - raise TypeError("No learner to handle '{}'".format(problem_type)) + raise TypeError(f"No learner to handle '{problem_type}'") if self.__learners[problem_type] is None: learner = self.__fits__[problem_type](**self.__kwargs(problem_type)) learner.use_default_preprocessors = self.use_default_preprocessors diff --git a/Orange/preprocess/discretize.py b/Orange/preprocess/discretize.py index e0db93fd4d6..c3156044d1d 100644 --- a/Orange/preprocess/discretize.py +++ b/Orange/preprocess/discretize.py @@ -31,14 +31,13 @@ def __init__(self, variable, points): @staticmethod def digitize(x, bins): - if sp.issparse(x): - if len(bins): - x.data = np.digitize(x.data, bins) - else: - x = sp.csr_matrix(x.shape) - return x - else: + if not sp.issparse(x): return np.digitize(x, bins) if len(bins) else [0]*len(x) + if len(bins): + x.data = np.digitize(x.data, bins) + else: + x = sp.csr_matrix(x.shape) + return x def transform(self, c): if sp.issparse(c): @@ -64,9 +63,8 @@ def create_discretized_var(cls, var, points, ndigits=None): def fmt(val): sval = var.str_val(val) # For decimal numbers, remove trailing 0's and . if no decimals left - if re.match(r"^\d+\.\d+", sval): - return sval.rstrip("0").rstrip(".") - return sval + return sval.rstrip("0").rstrip(".") if re.match(r"^\d+\.\d+", sval) else sval + else: def fmt(val): return f"{val:.{ndigits}f}" @@ -110,7 +108,7 @@ def __init__(self, value): self.value = value def __call__(self): - return "'%s'" % self.value + return f"'{self.value}'" class Discretization(Reprable): @@ -145,8 +143,8 @@ def __call__(self, data, attribute): att = attribute.to_sql() quantiles = [(i + 1) / self.n for i in range(self.n - 1)] query = data._sql_query( - ['quantile(%s, ARRAY%s)' % (att, str(quantiles))], - use_time_sample=1000) + [f'quantile({att}, ARRAY{quantiles})'], use_time_sample=1000 + ) with data._execute_sql_query(query) as cur: points = sorted(set(cur.fetchone()[0])) else: @@ -172,17 +170,16 @@ def __call__(self, data: Table, attribute, fixed=None): if fixed: mn, mx = fixed[attribute.name] points = self._split_eq_width(mn, mx) + elif type(data) == SqlTable: + stats = BasicStats(data, attribute) + points = self._split_eq_width(stats.min, stats.max) else: - if type(data) == SqlTable: - stats = BasicStats(data, attribute) - points = self._split_eq_width(stats.min, stats.max) + values = data.get_column(attribute) + if values.size: + mn, mx = ut.nanmin(values), ut.nanmax(values) + points = self._split_eq_width(mn, mx) else: - values = data.get_column(attribute) - if values.size: - mn, mx = ut.nanmin(values), ut.nanmax(values) - points = self._split_eq_width(mn, mx) - else: - points = [] + points = [] return Discretizer.create_discretized_var( data.domain[attribute], points) @@ -211,7 +208,7 @@ def __call__(self, data: Table, attribute): if not np.isnan(mn): minf = int(1 + np.floor(mn / self.width)) maxf = int(1 + np.floor(mx / self.width)) - if maxf - minf - 1 >= 100: + if maxf - minf >= 101: raise TooManyIntervals points = [i * self.width for i in range(minf, maxf)] return Discretizer.create_discretized_var( @@ -610,7 +607,7 @@ def _simplified_labels(labels): def _unique_time_bins(unique): times = [utc_from_timestamp(x).timetuple() for x in unique] - fmt = f'%y %b %d' + fmt = '%y %b %d' fmt += " %H:%M" * (len({t[2:] for t in times}) > 1) fmt += ":%S" * bool(np.all(unique % 60 == 0)) labels = [time.strftime(fmt, x) for x in times] @@ -694,12 +691,11 @@ def _normalize(cls, scale[scale == 0] = 1 if out is None: return X / scale - else: - if out is not X: - assert out.shape == X.shape - out[:] = X - out /= scale - return out + if out is not X: + assert out.shape == X.shape + out[:] = X + out /= scale + return out @classmethod def _entropy_normalized(cls, D, axis=None): @@ -897,10 +893,8 @@ def transform_list(s, fixed=None): else: new_vars.append(var) return new_vars - if self.method is None: - method = EqualFreq(n=4) - else: - method = self.method + + method = EqualFreq(n=4) if self.method is None else self.method domain = data.domain new_attrs = transform_list(domain.attributes, fixed or self.fixed) if self.discretize_class: diff --git a/Orange/preprocess/fss.py b/Orange/preprocess/fss.py index fc68eab990e..23af5edd031 100644 --- a/Orange/preprocess/fss.py +++ b/Orange/preprocess/fss.py @@ -52,23 +52,20 @@ def __init__(self, method=None, k=None, threshold=None, decreasing=True): def __call__(self, data): n_attrs = len(data.domain.attributes) - if isinstance(self.k, float): - effective_k = np.round(self.k * n_attrs).astype(int) or 1 - else: - effective_k = self.k - + effective_k = ( + np.round(self.k * n_attrs).astype(int) or 1 + if isinstance(self.k, float) + else self.k + ) method = self.method # select default method according to the provided data if method is None: autoMethod = True - discr_ratio = (sum(a.is_discrete - for a in data.domain.attributes) - / len(data.domain.attributes)) if data.domain.has_discrete_class: - if discr_ratio >= 0.5: - method = GainRatio() - else: - method = ANOVA() + discr_ratio = (sum(a.is_discrete + for a in data.domain.attributes) + / len(data.domain.attributes)) + method = GainRatio() if discr_ratio >= 0.5 else ANOVA() else: method = UnivariateLinearRegression() diff --git a/Orange/preprocess/impute.py b/Orange/preprocess/impute.py index c67c4a97434..a977704a2b7 100644 --- a/Orange/preprocess/impute.py +++ b/Orange/preprocess/impute.py @@ -26,11 +26,10 @@ def __init__(self, variable, value=0): self.value = value def transform(self, c): - if sp.issparse(c): - c.data = np.where(np.isnan(c.data), self.value, c.data) - return c - else: + if not sp.issparse(c): return np.where(np.isnan(c), self.value, c) + c.data = np.where(np.isnan(c.data), self.value, c.data) + return c def __eq__(self, other): return super().__eq__(other) and self.value == other.value @@ -124,7 +123,7 @@ def __init__(self, var, default): self.default = default def __call__(self): - return 'coalesce(%s, %s)' % (self.var.to_sql(), str(self.default)) + return f'coalesce({self.var.to_sql()}, {str(self.default)})' class Default(BaseImputeMethod): @@ -223,7 +222,7 @@ class Model(BaseImputeMethod): format = BaseImputeMethod.format + " ({self.learner.name})" @property def name(self): - return "{} ({})".format(self._name, getattr(self.learner, 'name', '')) + return f"{self._name} ({getattr(self.learner, 'name', '')})" def __init__(self, learner): self.learner = learner @@ -233,15 +232,13 @@ def __call__(self, data, variable): domain = domain_with_class_var(data.domain, variable) incompatibility_reason = self.learner.incompatibility_reason(domain) - if incompatibility_reason is None: - data = data.transform(domain) - model = self.learner(data) - assert model.domain.class_var == variable - return variable.copy( - compute_value=ReplaceUnknownsModel(variable, model)) - else: - raise ValueError("`{}` doesn't support domain type" - .format(self.learner.name)) + if incompatibility_reason is not None: + raise ValueError(f"`{self.learner.name}` doesn't support domain type") + data = data.transform(domain) + model = self.learner(data) + assert model.domain.class_var == variable + return variable.copy( + compute_value=ReplaceUnknownsModel(variable, model)) def copy(self): return Model(self.learner) @@ -285,17 +282,16 @@ def __call__(self, data, variable): if variable.is_discrete: fmt = "{var.name}" value = "N/A" - var = Orange.data.DiscreteVariable( + return Orange.data.DiscreteVariable( fmt.format(var=variable), - values=variable.values + (value, ), + values=variable.values + (value,), compute_value=Lookup( variable, np.arange(len(variable.values), dtype=int), - unknown=len(variable.values)), + unknown=len(variable.values), + ), sparse=variable.sparse, - ) - return var - + ) elif variable.is_continuous: fmt = "{var.name}_def" indicator_var = Orange.data.DiscreteVariable( @@ -347,10 +343,7 @@ def __init__(self, variable, distribution): self.sample_prob = np.ones_like(counts) / len(counts) def transform(self, c): - if not sp.issparse(c): - c = np.array(c, copy=True) - else: - c = c.toarray().ravel() + c = c.toarray().ravel() if sp.issparse(c) else np.array(c, copy=True) nanindices = np.flatnonzero(np.isnan(c)) if self.variable.is_discrete: @@ -383,12 +376,12 @@ def __call__(self, data, variable): # A distribution is invalid if a continuous variable's column does not # contain any known values or if a discrete variable's .values == [] isinvalid = dist.size == 0 - if isinvalid and variable.is_discrete: - assert len(variable.values) == 0 - raise ValueError("'{}' has no values".format(variable)) - elif isinvalid and variable.is_continuous: - raise ValueError("'{}' has an unknown distribution" - .format(variable)) + if isinvalid: + if variable.is_discrete: + assert len(variable.values) == 0 + raise ValueError(f"'{variable}' has no values") + elif variable.is_continuous: + raise ValueError(f"'{variable}' has an unknown distribution") if variable.is_discrete and np.sum(dist) == 0: dist += 1 / len(dist) diff --git a/Orange/preprocess/normalize.py b/Orange/preprocess/normalize.py index 8e9206f697f..978b598ed2c 100644 --- a/Orange/preprocess/normalize.py +++ b/Orange/preprocess/normalize.py @@ -52,11 +52,7 @@ def normalize_by_sd(self, stats, var: ContinuousVariable) -> ContinuousVariable: sd = 1 if sd == 0: sd = 1 - if self.center: - compute_val = Norm(var, avg, 1 / sd) - else: - compute_val = Norm(var, 0, 1 / sd) - + compute_val = Norm(var, avg, 1 / sd) if self.center else Norm(var, 0, 1 / sd) # When dealing with integers, and multiplying by something smaller than # 1, the number of decimals should be decreased, but this integer will # likely turn into a float, which should have some default number of @@ -75,9 +71,8 @@ def normalize_by_span(self, stats, var: ContinuousVariable) -> ContinuousVariabl compute_val = Norm(var, dmi, 1 / diff) else: compute_val = Norm(var, (dma + dmi) / 2, 2 / diff) - if not np.isnan(diff): - num_decimals = var.number_of_decimals + int(np.ceil(np.log10(diff))) - num_decimals = max(num_decimals, 0) # num decimals can't be negative - return var.copy(compute_value=compute_val, number_of_decimals=num_decimals) - else: + if np.isnan(diff): return var.copy(compute_value=compute_val) + num_decimals = var.number_of_decimals + int(np.ceil(np.log10(diff))) + num_decimals = max(num_decimals, 0) # num decimals can't be negative + return var.copy(compute_value=compute_val, number_of_decimals=num_decimals) diff --git a/Orange/preprocess/preprocess.py b/Orange/preprocess/preprocess.py index 8704928f5c0..76de3e72936 100644 --- a/Orange/preprocess/preprocess.py +++ b/Orange/preprocess/preprocess.py @@ -474,7 +474,7 @@ class Scale(Preprocess): """ class _MethodEnum(Enum): def __call__(self, *args, **kwargs): - return getattr(Scale, '_' + self.name)(*args, **kwargs) + return getattr(Scale, f'_{self.name}')(*args, **kwargs) CenteringType = _MethodEnum("Scale", ("NoCentering", "Mean", "Median"), qualname="Scale.CenteringType") @@ -601,13 +601,12 @@ def __call__(self, data): for i in range(w)] else: sparseness = data.X.shape[0] - np.count_nonzero(data.X, axis=0) - else: # filter by nans - if sp.issparse(data.X): - data_csc = sp.csc_matrix(data.X) - sparseness = [np.sum(np.isnan(data.X[:, i].data)) - for i in range(data_csc.shape[1])] - else: - sparseness = np.sum(np.isnan(data.X), axis=0) + elif sp.issparse(data.X): + data_csc = sp.csc_matrix(data.X) + sparseness = [np.sum(np.isnan(data.X[:, i].data)) + for i in range(data_csc.shape[1])] + else: + sparseness = np.sum(np.isnan(data.X), axis=0) att = [a for a, s in zip(data.domain.attributes, sparseness) if s <= threshold] domain = Orange.data.Domain(att, data.domain.class_vars, diff --git a/Orange/preprocess/remove.py b/Orange/preprocess/remove.py index dddba413331..41b90f70fd6 100644 --- a/Orange/preprocess/remove.py +++ b/Orange/preprocess/remove.py @@ -214,16 +214,14 @@ def has_at_least_two_values(data, var): def remove_constant(var, data): - if var.is_continuous: - if not has_at_least_two_values(data, var): - return None - else: - return var - elif var.is_discrete: - if len(var.values) < 2: - return None - else: - return var + if ( + var.is_continuous + and not has_at_least_two_values(data, var) + or not var.is_continuous + and var.is_discrete + and len(var.values) < 2 + ): + return None else: return var diff --git a/Orange/preprocess/score.py b/Orange/preprocess/score.py index 457437faf1c..36493b3b99c 100644 --- a/Orange/preprocess/score.py +++ b/Orange/preprocess/score.py @@ -34,9 +34,11 @@ def friendly_name(self): """Return type name with camel-case separated into words. Derived classes can provide a better property or a class attribute. """ - return re.sub("([a-z])([A-Z])", - lambda mo: mo.group(1) + " " + mo.group(2).lower(), - type(self).__name__) + return re.sub( + "([a-z])([A-Z])", + lambda mo: f"{mo.group(1)} {mo.group(2).lower()}", + type(self).__name__, + ) @staticmethod def _friendly_vartype_name(vartype): @@ -46,20 +48,15 @@ def _friendly_vartype_name(vartype): return "numeric" # Fallbacks name = vartype.__name__ - if name.endswith("Variable"): - return name.lower()[:-8] - return name + return name.lower()[:-8] if name.endswith("Variable") else name def __call__(self, data, feature=None): if not data.domain.class_var: - raise ValueError( - "{} requires data with a target variable." - .format(self.friendly_name)) + raise ValueError(f"{self.friendly_name} requires data with a target variable.") if not isinstance(data.domain.class_var, self.class_type): raise ValueError( - "{} requires a {} target variable." - .format(self.friendly_name, - self._friendly_vartype_name(self.class_type))) + f"{self.friendly_name} requires a {self._friendly_vartype_name(self.class_type)} target variable." + ) if feature is not None: f = data.domain[feature] @@ -72,9 +69,8 @@ def __call__(self, data, feature=None): for var in data.domain.attributes: if not isinstance(var, self.feature_type): raise ValueError( - "{} cannot score {} variables." - .format(self.friendly_name, - self._friendly_vartype_name(type(var)))) + f"{self.friendly_name} cannot score {self._friendly_vartype_name(type(var))} variables." + ) if feature is not None: return self.score_data(data, feature) @@ -99,9 +95,7 @@ class SklScorer(Scorer, metaclass=WrapperMeta): def score_data(self, data, feature): score = self.score(data.X, data.Y) - if feature is not None: - return score[0] - return score + return score[0] if feature is not None else score class Chi2(SklScorer): @@ -172,7 +166,7 @@ def join_derived_features(scores): for attr, score in zip(model_attributes, scores): # Go up the chain of preprocessors to obtain the original variable, but no further # than the data.domain, because the data is perhaphs already preprocessed. - while not (attr in data.domain) and attr.compute_value is not None: + while attr not in data.domain and attr.compute_value is not None: if hasattr(attr.compute_value, 'variable'): attr = getattr(attr.compute_value, 'variable') else: @@ -190,7 +184,7 @@ def join_derived_features(scores): scores = np.array([join_derived_features(row) for row in scores]) return scores[:, data.domain.attributes.index(feature)] \ - if feature else scores + if feature else scores class ClassificationScorer(Scorer): @@ -221,7 +215,7 @@ class ClassificationScorer(Scorer): def score_data(self, data, feature): instances_with_class = \ - np.sum(distribution.Discrete(data, data.domain.class_var)) + np.sum(distribution.Discrete(data, data.domain.class_var)) def score_from_contingency(f): cont = contingency.Discrete(data, f) @@ -229,9 +223,7 @@ def score_from_contingency(f): cont, 1. - np.sum(cont.unknowns)/instances_with_class) scores = [score_from_contingency(f) for f in data.domain.attributes] - if feature is not None: - return scores[0] - return scores + return scores[0] if feature is not None else scores def _entropy(dist): @@ -266,9 +258,10 @@ class FCBF(ClassificationScorer): """ def score_data(self, data, feature=None): attributes = data.domain.attributes - s = [] - for i, attr in enumerate(attributes): - s.append((_symmetrical_uncertainty(data, attr, data.domain.class_var), i)) + s = [ + (_symmetrical_uncertainty(data, attr, data.domain.class_var), i) + for i, attr in enumerate(attributes) + ] s.sort() worst = [] @@ -292,7 +285,7 @@ def score_data(self, data, feature=None): p += 1 best = s scores = [i[0] for i in sorted(chain(best, worst), key=lambda i: i[1])] - return np.array(scores) if not feature else scores[0] + return scores[0] if feature else np.array(scores) class InfoGain(ClassificationScorer): @@ -374,9 +367,7 @@ def score_data(self, data, feature): self.n_iterations, self.k_nearest, np.array([a.is_discrete for a in data.domain.attributes]), rstate)) - if feature: - return weights[0] - return weights + return weights[0] if feature else weights class RReliefF(Scorer): @@ -406,9 +397,7 @@ def score_data(self, data, feature): self.n_iterations, self.k_nearest, np.array([a.is_discrete for a in data.domain.attributes]), rstate)) - if feature: - return weights[0] - return weights + return weights[0] if feature else weights if __name__ == '__main__': diff --git a/Orange/preprocess/transformation.py b/Orange/preprocess/transformation.py index 20b55bd7c11..e7acc450488 100644 --- a/Orange/preprocess/transformation.py +++ b/Orange/preprocess/transformation.py @@ -55,10 +55,7 @@ def __call__(self, data): if inst: data = Table.from_list(data.domain, [data]) data = data.transform(self._target_domain) - if self.variable.is_primitive(): - col = data.X - else: - col = data.metas + col = data.X if self.variable.is_primitive() else data.metas if not sp.issparse(col) and col.ndim > 1: col = col.squeeze(axis=1) transformed = self.transform(col) @@ -112,10 +109,7 @@ def __hash__(self): @staticmethod def _nan_fixed(c, transformed): if np.isscalar(c): - if c != c: # pylint: disable=comparison-with-itself - transformed = np.nan - else: - transformed = float(transformed) + transformed = np.nan if c != c else float(transformed) else: transformed = transformed.astype(float) transformed[np.isnan(c)] = np.nan @@ -181,12 +175,11 @@ def __init__(self, variable, offset, factor): self.factor = factor def transform(self, c): - if sp.issparse(c): - if self.offset != 0: - raise ValueError('Normalization does not work for sparse data.') - return c * self.factor - else: + if not sp.issparse(c): return (c - self.offset) * self.factor + if self.offset != 0: + raise ValueError('Normalization does not work for sparse data.') + return c * self.factor def __eq__(self, other): return super().__eq__(other) \ diff --git a/Orange/projection/base.py b/Orange/projection/base.py index b07f0cb39e1..6eb8ee2395d 100644 --- a/Orange/projection/base.py +++ b/Orange/projection/base.py @@ -23,10 +23,13 @@ def __init__(self, attrs, weights, mean=None): def __call__(self): if self.mean is None: - return ' + '.join('{} * {}'.format(w, a.to_sql()) - for a, w in zip(self.attrs, self.weights)) - return ' + '.join('{} * ({} - {})'.format(w, a.to_sql(), m, w) - for a, m, w in zip(self.attrs, self.mean, self.weights)) + return ' + '.join( + f'{w} * {a.to_sql()}' for a, w in zip(self.attrs, self.weights) + ) + return ' + '.join( + f'{w} * ({a.to_sql()} - {m})' + for a, m, w in zip(self.attrs, self.mean, self.weights) + ) class Projector(ReprableWithPreprocessors): @@ -195,18 +198,14 @@ def params(self, value): def _get_sklparams(self, values): sklprojection = self.__wraps__ - if sklprojection is not None: - spec = list( - inspect.signature(sklprojection.__init__).parameters.keys() - ) - # first argument is 'self' - assert spec[0] == "self" - params = { - name: values[name] for name in spec[1:] if name in values - } - else: + if sklprojection is None: raise TypeError("Wrapper does not define '__wraps__'") - return params + spec = list( + inspect.signature(sklprojection.__init__).parameters.keys() + ) + # first argument is 'self' + assert spec[0] == "self" + return {name: values[name] for name in spec[1:] if name in values} def preprocess(self, data): data = super().preprocess(data) diff --git a/Orange/projection/cur.py b/Orange/projection/cur.py index c46b59d799a..495b67d9e9e 100644 --- a/Orange/projection/cur.py +++ b/Orange/projection/cur.py @@ -106,10 +106,7 @@ def fit(self, X, Y=None): return m def transform(self, X, axis): - if axis == 0: - return X[:, self.features_] - else: - return X[self.samples_, :] + return X[:, self.features_] if axis == 0 else X[self.samples_, :] def _select_columns(self, X, UsV): U, s, V = UsV diff --git a/Orange/projection/freeviz.py b/Orange/projection/freeviz.py index 8e9451fcfec..bd977231295 100644 --- a/Orange/projection/freeviz.py +++ b/Orange/projection/freeviz.py @@ -107,19 +107,12 @@ def forces_regression(cls, distances, y, p=1): def forces_classification(cls, distances, y, p=1): diffclass = scipy.spatial.distance.pdist(y.reshape(-1, 1), "hamming") != 0 # handle attractive force - if p == 1: - F = -distances - else: - F = -(distances ** p) - + F = -distances if p == 1 else -(distances ** p) # handle repulsive force mask = (diffclass & (distances > np.finfo(distances.dtype).eps * 100)) assert mask.shape == F.shape and mask.dtype == bool - if p == 1: - F[mask] = 1 / distances[mask] - else: - F[mask] = 1 / (distances[mask] ** p) + F[mask] = 1 / distances[mask] if p == 1 else 1 / (distances[mask] ** p) return F @classmethod @@ -130,18 +123,20 @@ def gradient(cls, X, embeddings, forces, embedding_dist=None, weights=None): if weights is not None: weights = np.asarray(weights) if weights.ndim != 1: - raise ValueError("weights.ndim != 1 ({})".format(weights.ndim)) + raise ValueError(f"weights.ndim != 1 ({weights.ndim})") N, P = X.shape _, dim = embeddings.shape - if not N == embeddings.shape[0]: - raise ValueError("X and embeddings must have the same length ({}!={})" - .format(X.shape[0], embeddings.shape[0])) + if N != embeddings.shape[0]: + raise ValueError( + f"X and embeddings must have the same length ({X.shape[0]}!={embeddings.shape[0]})" + ) if weights is not None and X.shape[0] != weights.shape[0]: - raise ValueError("X.shape[0] != weights.shape[0] ({}!={})" - .format(X.shape[0], weights.shape[0])) + raise ValueError( + f"X.shape[0] != weights.shape[0] ({X.shape[0]}!={weights.shape[0]})" + ) # all pairwise vector differences between embeddings embedding_diff = (embeddings[:, np.newaxis, :] - @@ -217,8 +212,7 @@ def freeviz_gradient(cls, X, y, embedding, p=1, weights=None, is_class_discrete= forces = cls.forces_classification(D, y, p=p) else: forces = cls.forces_regression(D, y, p=p) - G = cls.gradient(X, embedding, forces, embedding_dist=D, weights=weights) - return G + return cls.gradient(X, embedding, forces, embedding_dist=D, weights=weights) @classmethod def _rotate(cls, A): @@ -298,26 +292,20 @@ def freeviz(cls, X, y, weights=None, center=True, scale=True, dim=2, p=1, weights = np.asarray(weights) if isinstance(center, bool): - if center: - center = np.mean(X, axis=0) - else: - center = None + center = np.mean(X, axis=0) if center else None else: center = np.asarray(center, dtype=X.dtype) if center.shape != (P, ): - raise ValueError("center.shape != (X.shape[1], ) ({} != {})" - .format(center.shape, (X.shape[1], ))) + raise ValueError( + f"center.shape != (X.shape[1], ) ({center.shape} != {(X.shape[1], )})" + ) if isinstance(scale, bool): - if scale: - scale = np.std(X, axis=0) - else: - scale = None + scale = np.std(X, axis=0) if scale else None else: scale = np.asarray(scale, dtype=X.dtype) if scale.shape != (P, ): - raise ValueError("scale.shape != (X.shape[1],) ({} != {}))" - .format(scale.shape, (P, ))) + raise ValueError(f"scale.shape != (X.shape[1],) ({scale.shape} != {(P, )}))") if initial is not None: initial = np.asarray(initial) @@ -367,7 +355,7 @@ def freeviz(cls, X, y, weights=None, center=True, scale=True, dim=2, p=1, A = Anew embeddings = np.dot(X, A) - step_i = step_i + 1 + step_i += 1 if dim == 2: A = cls._rotate(A) @@ -387,8 +375,7 @@ def init_radial(p): else: axes_angle = np.linspace(0, 2 * np.pi, p, endpoint=False) - A = np.c_[np.cos(axes_angle), np.sin(axes_angle)] - return A + return np.c_[np.cos(axes_angle), np.sin(axes_angle)] @staticmethod def init_random(p, dim, rstate=None): diff --git a/Orange/projection/manifold.py b/Orange/projection/manifold.py index d8d64490a57..0d0536eca47 100644 --- a/Orange/projection/manifold.py +++ b/Orange/projection/manifold.py @@ -73,11 +73,7 @@ def torgerson(distances, n_components=2, eigen_solver="auto"): B = np.multiply(D_sq, -0.5, out=D_sq) if eigen_solver == 'auto': - if N > 200 and n_components < 10: # arbitrary - follow skl KernelPCA - eigen_solver = 'arpack' - else: - eigen_solver = 'lapack' - + eigen_solver = 'arpack' if N > 200 and n_components < 10 else 'lapack' if eigen_solver == "arpack": v0 = np.random.RandomState(0xD06).uniform(-1, 1, B.shape[0]) w, v = arpack_eigh(B, k=n_components, v0=v0) @@ -98,9 +94,9 @@ def torgerson(distances, n_components=2, eigen_solver="auto"): neg = L < -5 * np.finfo(L.dtype).eps if np.any(neg): warnings.warn( - ("{} of the {} eigenvalues were negative." - .format(np.sum(neg), L.size)), - UserWarning, stacklevel=2, + f"{np.sum(neg)} of the {L.size} eigenvalues were negative.", + UserWarning, + stacklevel=2, ) # ... and clamp them all to 0 L[L < 0] = 0 @@ -236,12 +232,11 @@ def transform(self, X: np.ndarray, learning_rate=1, **kwargs) -> openTSNE.Partia "X.toarray() to convert to a dense numpy array." ) if isinstance(self.embedding_.affinities, openTSNE.affinity.Multiscale): - perplexity = kwargs.pop("perplexity", False) - if perplexity: + if perplexity := kwargs.pop("perplexity", False): if not isinstance(self.perplexity, Iterable): raise ValueError( - "Perplexity should be an instance of `Iterable`, `%s` " - "given." % type(self.perplexity).__name__) + f"Perplexity should be an instance of `Iterable`, `{type(self.perplexity).__name__}` given." + ) perplexity_params = {"perplexities": perplexity} else: perplexity_params = {} @@ -428,7 +423,7 @@ def compute_affinities(self, X): "Perplexity should be an instance of `Iterable`, `%s` " "given." % type(self.perplexity).__name__ ) - affinities = openTSNE.affinity.Multiscale( + return openTSNE.affinity.Multiscale( X, perplexities=self.perplexity, metric=self.metric, @@ -442,7 +437,7 @@ def compute_affinities(self, X): "Perplexity should be an instance of `float`, `%s` " "given." % type(self.perplexity).__name__ ) - affinities = openTSNE.affinity.PerplexityBasedNN( + return openTSNE.affinity.PerplexityBasedNN( X, perplexity=self.perplexity, metric=self.metric, @@ -451,8 +446,6 @@ def compute_affinities(self, X): n_jobs=self.n_jobs, ) - return affinities - def compute_initialization(self, X): # Compute the initial positions of individual points if isinstance(self.initialization, np.ndarray): diff --git a/Orange/projection/radviz.py b/Orange/projection/radviz.py index c49ec82d52d..7f77a9bf16e 100644 --- a/Orange/projection/radviz.py +++ b/Orange/projection/radviz.py @@ -19,11 +19,15 @@ class RadViz(LinearProjector): projection = RadVizModel def __call__(self, data): - if data is not None: - if len([attr for attr in data.domain.attributes - if attr.is_discrete and len(attr.values) > 2]): - raise ValueError("Can not handle categorical variables" - " with more than two values") + if data is not None and len( + [ + attr + for attr in data.domain.attributes + if attr.is_discrete and len(attr.values) > 2 + ] + ): + raise ValueError("Can not handle categorical variables" + " with more than two values") return super().__call__(data) def get_components(self, X, Y): diff --git a/Orange/regression/curvefit.py b/Orange/regression/curvefit.py index b3d7234a297..3567368336a 100644 --- a/Orange/regression/curvefit.py +++ b/Orange/regression/curvefit.py @@ -45,10 +45,11 @@ def coefficients(self) -> Table: def predict(self, X: np.ndarray) -> np.ndarray: predicted = self.__function(X, *self.__parameters) - if not isinstance(predicted, np.ndarray): - # handle constant function; i.e. len(self.domain.attributes) == 0 - return np.full(len(X), predicted, dtype=float) - return predicted.flatten() + return ( + predicted.flatten() + if isinstance(predicted, np.ndarray) + else np.full(len(X), predicted, dtype=float) + ) def __getstate__(self) -> Dict: if not self.__create_lambda_args: @@ -195,10 +196,11 @@ def fit_storage(self, data: Table) -> CurveFitModel: attributes = [] for attr in domain.attributes: if attr.name in self.__features_names: - if not attr.is_continuous: - raise ValueError("Numeric feature expected.") - attributes.append(attr) + if attr.is_continuous: + attributes.append(attr) + else: + raise ValueError("Numeric feature expected.") new_domain = Domain(attributes, domain.class_vars, domain.metas) transformed = data.transform(new_domain) params = curve_fit(self.__function, transformed.X, transformed.Y, @@ -389,15 +391,14 @@ def __init__(self, name: str, vars_mapper: Dict, functions: List): def visit_Name(self, node: ast.Name) -> Union[ast.Name, ast.Subscript]: if node.id not in self.__vars_mapper or node.id in self.__functions: return node - else: - n = self.__vars_mapper[node.id] - return ast.Subscript( - value=ast.Name(id=self.__name, ctx=ast.Load()), - slice=ast.ExtSlice( - dims=[ast.Slice(lower=None, upper=None, step=None), - ast.Index(value=ast.Num(n=n))]), - ctx=node.ctx - ) + n = self.__vars_mapper[node.id] + return ast.Subscript( + value=ast.Name(id=self.__name, ctx=ast.Load()), + slice=ast.ExtSlice( + dims=[ast.Slice(lower=None, upper=None, step=None), + ast.Index(value=ast.Num(n=n))]), + ctx=node.ctx + ) if __name__ == "__main__": diff --git a/Orange/regression/linear.py b/Orange/regression/linear.py index 7342d3a3150..921c05e1bef 100644 --- a/Orange/regression/linear.py +++ b/Orange/regression/linear.py @@ -141,7 +141,7 @@ def coefficients(self): return self.skl_model.coef_ def __str__(self): - return 'LinearModel {}'.format(self.skl_model) + return f'LinearModel {self.skl_model}' class PolynomialModel(Model): @@ -155,7 +155,7 @@ def predict(self, X): return self.model.predict(X) def __str__(self): - return 'PolynomialModel {}'.format(self.model) + return f'PolynomialModel {self.model}' PolynomialLearner.__returns__ = PolynomialModel diff --git a/Orange/regression/mean.py b/Orange/regression/mean.py index 91b4de91f71..66c9b9b8e6a 100644 --- a/Orange/regression/mean.py +++ b/Orange/regression/mean.py @@ -53,10 +53,7 @@ def __init__(self, dist, domain=None): # domain is None. self.domain = domain self.dist = dist - if dist.any(): - self.mean = self.dist.mean() - else: - self.mean = 0.0 + self.mean = self.dist.mean() if dist.any() else 0.0 # noinspection PyPep8Naming def predict(self, X): @@ -72,6 +69,6 @@ def predict(self, X): return numpy.full(len(X), self.mean) def __str__(self): - return 'MeanModel({})'.format(self.mean) + return f'MeanModel({self.mean})' MeanLearner.__returns__ = MeanModel diff --git a/Orange/regression/svm.py b/Orange/regression/svm.py index c3ef4d6089b..c8d0bfdf51e 100644 --- a/Orange/regression/svm.py +++ b/Orange/regression/svm.py @@ -48,4 +48,4 @@ def __init__(self, nu=0.5, C=1.0, kernel='rbf', degree=3, gamma="auto", learners = [SVRLearner(), LinearSVRLearner(), NuSVRLearner()] res = Orange.evaluation.CrossValidation(data, learners) for l, ca in zip(learners, Orange.evaluation.RMSE(res)): - print("learner: {}\nRMSE: {}\n".format(l, ca)) + print(f"learner: {l}\nRMSE: {ca}\n") diff --git a/Orange/regression/tree.py b/Orange/regression/tree.py index 79846a73d04..27661609ccc 100644 --- a/Orange/regression/tree.py +++ b/Orange/regression/tree.py @@ -163,17 +163,16 @@ def fit_storage(self, data): for attr in data.domain.attributes): # No fallback in the script; widgets can prevent this error # by providing a fallback and issue a warning about doing so - raise ValueError("Exhaustive binarization does not handle " - "attributes with more than {} values". - format(self.MAX_BINARIZATION)) + raise ValueError( + f"Exhaustive binarization does not handle attributes with more than {self.MAX_BINARIZATION} values" + ) active_inst = np.nonzero(~np.isnan(data.Y))[0].astype(np.int32) root = self._build_tree(data, active_inst) if root is None: root = Node(None, 0, np.array([0., 0.])) root.subset = active_inst - model = TreeModel(data, root) - return model + return TreeModel(data, root) class SklTreeRegressor(SklModel, TreeModelInterface): diff --git a/Orange/statistics/contingency.py b/Orange/statistics/contingency.py index da9a1c81983..d0c700f8f33 100644 --- a/Orange/statistics/contingency.py +++ b/Orange/statistics/contingency.py @@ -17,10 +17,11 @@ def _get_variable(variable, dat, attr_name, expected_type=None, expected_name="" failed = True if failed or (expected_type is not None and not isinstance(variable, expected_type)): if not expected_type or isinstance(variable, data.Variable): - raise ValueError("expected %s variable not %s" % (expected_name, variable)) + raise ValueError(f"expected {expected_name} variable not {variable}") else: - raise ValueError("expected %s, not '%s'" % ( - expected_type.__name__, type(variable).__name__)) + raise ValueError( + f"expected {expected_type.__name__}, not '{type(variable).__name__}'" + ) return variable @@ -61,14 +62,14 @@ def __new__(cls, dat, col_variable=None, row_variable=None, def from_data(cls, data, col_variable, row_variable=None): if row_variable is None: row_variable = data.domain.class_var - if row_variable is None: - raise ValueError( - "row_variable needs to be specified (data has no class)") + if row_variable is None: + raise ValueError( + "row_variable needs to be specified (data has no class)") row_variable = _get_variable(row_variable, data, "row_variable") col_variable = _get_variable(col_variable, data, "col_variable") try: dist, col_unknowns, row_unknowns, unknowns = \ - data._compute_contingency([col_variable], row_variable)[0] + data._compute_contingency([col_variable], row_variable)[0] self = super().__new__(cls, dist.shape) self[...] = dist @@ -223,14 +224,14 @@ def __init__(self, dat, col_variable=None, row_variable=None, def from_data(self, data, col_variable, row_variable=None): if row_variable is None: row_variable = data.domain.class_var - if row_variable is None: - raise ValueError("row_variable needs to be specified (data has no class)") + if row_variable is None: + raise ValueError("row_variable needs to be specified (data has no class)") self.row_variable = _get_variable(row_variable, data, "row_variable") self.col_variable = _get_variable(col_variable, data, "col_variable") try: conts = data._compute_contingency([col_variable], row_variable) (self.values, self.counts), self.col_unknowns, self.row_unknowns, \ - self.unknowns = conts[0] + self.unknowns = conts[0] except NotImplementedError: raise NotImplementedError( "Fallback method for computation of contingencies is not implemented yet" @@ -243,7 +244,7 @@ def array_with_unknowns(self): with adding a row of row_unknowns together with values. """ # pylint: disable=unnecessary-comprehension - other_rows = [x for x in self] + other_rows = list(self) ind = self.row_unknowns > 0 unknown_rows = np.vstack((self.values[ind], self.row_unknowns[ind])) return other_rows + [unknown_rows] @@ -293,9 +294,8 @@ def normalize(self, axis=None): if t > 1e-6: x[:, 1] /= t self.unknowns[i] /= t - else: - if self.unknowns[i] > 1e-6: - self.unknowns[i] = 1 + elif self.unknowns[i] > 1e-6: + self.unknowns[i] = 1 def get_contingency(dat, col_variable, row_variable=None, col_unknowns=None, @@ -308,8 +308,7 @@ def get_contingency(dat, col_variable, row_variable=None, col_unknowns=None, return Continuous( dat, col_variable, row_variable, col_unknowns, row_unknowns) else: - raise TypeError( - "cannot compute distribution of '%s'" % type(variable).__name__) + raise TypeError(f"cannot compute distribution of '{type(variable).__name__}'") def get_contingencies(dat, skip_discrete=False, skip_continuous=False): @@ -329,10 +328,10 @@ def get_contingencies(dat, skip_discrete=False, skip_continuous=False): dist_unks = dat._compute_contingency(columns) if columns is None: columns = np.arange(len(vars)) - contigs = [] - for col, (cont, col_unk, row_unk, unks) in zip(columns, dist_unks): - contigs.append(get_contingency( - cont, vars[col], row_var, col_unk, row_unk, unks)) + contigs = [ + get_contingency(cont, vars[col], row_var, col_unk, row_unk, unks) + for col, (cont, col_unk, row_unk, unks) in zip(columns, dist_unks) + ] except NotImplementedError: if columns is None: columns = range(len(vars)) diff --git a/Orange/statistics/distribution.py b/Orange/statistics/distribution.py index 43c1680e6d8..e93e38a8be1 100644 --- a/Orange/statistics/distribution.py +++ b/Orange/statistics/distribution.py @@ -22,10 +22,11 @@ def _get_variable(dat, variable, expected_type=None, expected_name=""): failed = True if failed or (expected_type is not None and not isinstance(variable, expected_type)): if isinstance(variable, data.Variable): - raise ValueError("expected %s variable not %s" % (expected_name, variable)) + raise ValueError(f"expected {expected_name} variable not {variable}") else: - raise ValueError("expected %s, not '%s'" % ( - expected_type.__name__, type(variable).__name__)) + raise ValueError( + f"expected {expected_type.__name__}, not '{type(variable).__name__}'" + ) return variable @@ -276,7 +277,7 @@ def from_data(cls, variable, data): col = data[:, variable] dtype = col.dtype if data.has_weights(): - if not "float" in dtype.name and "float" in col.dtype.name: + if "float" not in dtype.name and "float" in col.dtype.name: dtype = col.dtype.name dist = np.empty((2, len(col)), dtype=dtype) dist[0, :] = col @@ -350,7 +351,7 @@ def get_distribution(dat, variable, unknowns=None): elif variable.is_continuous: return Continuous(dat, variable, unknowns) else: - raise TypeError("cannot compute distribution of '%s'" % type(variable).__name__) + raise TypeError(f"cannot compute distribution of '{type(variable).__name__}'") def get_distributions(dat, skipDiscrete=False, skipContinuous=False): @@ -368,9 +369,10 @@ def get_distributions(dat, skipDiscrete=False, skipContinuous=False): dist_unks = dat._compute_distributions(columns) if columns is None: columns = np.arange(len(vars)) - distributions = [] - for col, (dist, unks) in zip(columns, dist_unks): - distributions.append(get_distribution(dist, vars[col], unks)) + distributions = [ + get_distribution(dist, vars[col], unks) + for col, (dist, unks) in zip(columns, dist_unks) + ] except NotImplementedError: if columns is None: columns = np.arange(len(vars)) diff --git a/Orange/statistics/util.py b/Orange/statistics/util.py index 0aaf1024ea7..1130eb2ddd6 100644 --- a/Orange/statistics/util.py +++ b/Orange/statistics/util.py @@ -47,7 +47,7 @@ def _count_nans_per_row_sparse(X, weights, dtype=None): X = type(X)((np.isnan(X.data), X.indices, X.indptr), X.shape) return np.asarray(X.sum(axis=1), dtype=dtype).ravel() else: # pragma: no cover - raise TypeError("unsupported type '{}'".format(type(X).__name__)) + raise TypeError(f"unsupported type '{type(X).__name__}'") def sparse_count_implicit_zeros(x): @@ -69,20 +69,19 @@ def sparse_implicit_zero_weights(x, weights): if not sp.issparse(x): raise TypeError('The matrix provided was not sparse.') - if weights.ndim == 1: - # Match weights and x axis so `indices` will be set appropriately - if x.shape[0] == weights.shape[0]: - x = x.tocsc() - elif x.shape[1] == weights.shape[0]: - x = x.tocsr() - n_items = np.prod(x.shape) - zero_indices = np.setdiff1d(np.arange(n_items), x.indices, assume_unique=True) - return weights[zero_indices] - else: + if weights.ndim != 1: # Can easily be implemented using a coo_matrix raise NotImplementedError( 'Computing zero weights on ndimensinal weight matrix is not implemented' ) + # Match weights and x axis so `indices` will be set appropriately + if x.shape[0] == weights.shape[0]: + x = x.tocsc() + elif x.shape[1] == weights.shape[0]: + x = x.tocsr() + n_items = np.prod(x.shape) + zero_indices = np.setdiff1d(np.arange(n_items), x.indices, assume_unique=True) + return weights[zero_indices] def bincount(x, weights=None, max_val=None, minlength=0): @@ -159,11 +158,10 @@ def bincount(x, weights=None, max_val=None, minlength=0): # Since `csr_matrix.values` only contain non-zero values or explicit # zeros, we must count implicit zeros separately and add them to the # explicit ones found before - if sp.issparse(x_original): - # If x contains only NaNs, then bc will be an empty array - if zero_weights and bc.size == 0: + if sp.issparse(x_original) and zero_weights: + if bc.size == 0: bc = [zero_weights] - elif zero_weights: + else: bc[0] += zero_weights return bc, nans @@ -479,34 +477,33 @@ def nan_mean_var(x, axis=None, weights=None): if axis is None: raise NotImplementedError("axis=None is not supported") - if not sp.issparse(x): - if weights is None: - means = bn.nanmean(x, axis=axis) - variances = bn.nanvar(x, axis=axis) - else: - if axis == 0: - weights = weights.reshape(-1, 1) - elif axis == 1: - weights = weights.reshape(1, -1) - else: - raise NotImplementedError - - nanw = ~np.isnan(x) * weights # do not divide by non-used weights - wsum = np.sum(nanw, axis=axis) - means = bn.nansum(x * weights, axis=axis) / wsum - - if axis == 0: - mr = means.reshape(1, -1) - elif axis == 1: - mr = means.reshape(-1, 1) - - variances = bn.nansum(((x - mr) ** 2) * weights, axis=axis) / wsum - else: + if sp.issparse(x): # mean_variance_axis is picky regarding the input type if weights is not None: weights = weights.astype(float) means, variances = mean_variance_axis(x, axis=axis, weights=weights) + elif weights is None: + means = bn.nanmean(x, axis=axis) + variances = bn.nanvar(x, axis=axis) + else: + if axis == 0: + weights = weights.reshape(-1, 1) + elif axis == 1: + weights = weights.reshape(1, -1) + else: + raise NotImplementedError + + nanw = ~np.isnan(x) * weights # do not divide by non-used weights + wsum = np.sum(nanw, axis=axis) + means = bn.nansum(x * weights, axis=axis) / wsum + + if axis == 0: + mr = means.reshape(1, -1) + elif axis == 1: + mr = means.reshape(-1, 1) + + variances = bn.nansum(((x - mr) ** 2) * weights, axis=axis) / wsum return means, variances @@ -533,13 +530,7 @@ def nanmedian_sparse(x): n_nan = sum(np.isnan(x.data)) n_nonzero = sum(x.data[nz] != 0) n_zeros = np.prod(x.shape) - n_nonzero - n_nan - if n_zeros > n_nonzero: - # Typical case if use of sparse matrices make sense - return 0 - else: - # Possibly contains NaNs and - # more nz values than zeros, so allocating memory should not be too problematic - return np.nanmedian(x.toarray()) + return 0 if n_zeros > n_nonzero else np.nanmedian(x.toarray()) return _apply_func(x, np.nanmedian, nanmedian_sparse, axis=axis) @@ -770,9 +761,9 @@ def FDR(p_values: Iterable, dependent=False, m=None, ordered=False) -> Iterable: m = len(p_values) if not ordered: ordered = (np.diff(p_values) >= 0).all() - if not ordered: - indices = np.argsort(p_values) - p_values = p_values[indices] + if not ordered: + indices = np.argsort(p_values) + p_values = p_values[indices] if dependent: # correct q for dependent tests m *= sum(1 / np.arange(1, m + 1)) @@ -781,4 +772,4 @@ def FDR(p_values: Iterable, dependent=False, m=None, ordered=False) -> Iterable: fdrs = np.array(np.minimum.accumulate(fdrs)[::-1]) if not ordered: fdrs[indices] = fdrs.copy() - return fdrs if not is_list else list(fdrs) + return list(fdrs) if is_list else fdrs diff --git a/Orange/tests/sql/base.py b/Orange/tests/sql/base.py index c0ff2fa1645..8a34c4c91ca 100644 --- a/Orange/tests/sql/base.py +++ b/Orange/tests/sql/base.py @@ -13,7 +13,7 @@ def parse_uri(uri): """Parse uri to db type and dictionary of connection parameters.""" if uri == "": - return "", dict() + return "", {} parsed_uri = parse.urlparse(uri) database = parsed_uri.path.strip('/') if "/" in database: @@ -67,10 +67,7 @@ def test_parse_empty(self): parameters = parse_uri("") self.assertEqual("", parameters[0]) - self.assertDictContainsSubset( - dict(), - parameters[1] - ) + self.assertDictContainsSubset({}, parameters[1]) def assertDictContainsSubset(self, subset, dictionary, msg=None): """Checks whether dictionary is a superset of subset. @@ -85,21 +82,21 @@ def assertDictContainsSubset(self, subset, dictionary, msg=None): if key not in dictionary: missing.append(key) elif value != dictionary[key]: - mismatched.append('%s, expected: %s, actual: %s' % - (safe_repr(key), safe_repr(value), - safe_repr(dictionary[key]))) + mismatched.append( + f'{safe_repr(key)}, expected: {safe_repr(value)}, actual: {safe_repr(dictionary[key])}' + ) if not (missing or mismatched): return - standardMsg = '' if missing: - standardMsg = 'Missing: %s' % ','.join(safe_repr(m) for m in - missing) + standardMsg = f"Missing: {','.join(safe_repr(m) for m in missing)}" + else: + standardMsg = '' if mismatched: if standardMsg: standardMsg += '; ' - standardMsg += 'Mismatched values: %s' % ','.join(mismatched) + standardMsg += f"Mismatched values: {','.join(mismatched)}" self.fail(self._formatMessage(msg, standardMsg)) @@ -177,40 +174,32 @@ def try_connection(self): def create_sql_table(self, data, sql_column_types=None, sql_column_names=None, table_name=None): - data = list(data) - if table_name is None: table_name = ''.join(random.choices(string.ascii_lowercase, k=16)) + data = list(data) if sql_column_types is None: column_size = self._get_column_types(data) sql_column_types = [ - 'float' if size == 0 else 'varchar({})'.format(size) + 'float' if size == 0 else f'varchar({size})' for size in column_size ] if sql_column_names is None: - sql_column_names = ["col{}".format(i) - for i in range(len(sql_column_types))] + sql_column_names = [f"col{i}" for i in range(len(sql_column_types))] else: - sql_column_names = map(lambda x: '"{}"'.format(x), sql_column_names) + sql_column_names = map(lambda x: f'"{x}"', sql_column_names) - drop_table_sql = "DROP TABLE IF EXISTS {}".format(table_name) + drop_table_sql = f"DROP TABLE IF EXISTS {table_name}" - create_table_sql = "CREATE TABLE {} ({})".format( - table_name, - ", ".join('{} {}'.format(n, t) - for n, t in zip(sql_column_names, sql_column_types))) + create_table_sql = f"""CREATE TABLE {table_name} ({", ".join(f'{n} {t}' for n, t in zip(sql_column_names, sql_column_types))})""" insert_values = ", ".join( - "({})".format( - ", ".join("NULL" if v is None else "'{}'".format(v) - for v, t in zip(row, sql_column_types)) - ) for row in data + f"""({", ".join("NULL" if v is None else f"'{v}'" for v, t in zip(row, sql_column_types))})""" + for row in data ) - insert_sql = "INSERT INTO {} VALUES {}".format(table_name, - insert_values) + insert_sql = f"INSERT INTO {table_name} VALUES {insert_values}" import psycopg2 with psycopg2.connect(**self.params) as conn: @@ -228,7 +217,7 @@ def drop_sql_table(self, table_name): import psycopg2 with psycopg2.connect(**self.params) as conn: with conn.cursor() as curs: - curs.execute("DROP TABLE {}".format(table_name)) + curs.execute(f"DROP TABLE {table_name}") def get_backend(self): from Orange.data.sql.backend import Psycopg2Backend @@ -250,40 +239,32 @@ def try_connection(self): def create_sql_table(self, data, sql_column_types=None, sql_column_names=None, table_name=None): - data = list(data) - if table_name is None: table_name = ''.join(random.choices(string.ascii_lowercase, k=16)) + data = list(data) if sql_column_types is None: column_size = self._get_column_types(data) sql_column_types = [ - 'float' if size == 0 else 'varchar({})'.format(size) + 'float' if size == 0 else f'varchar({size})' for size in column_size ] if sql_column_names is None: - sql_column_names = ["col{}".format(i) - for i in range(len(sql_column_types))] + sql_column_names = [f"col{i}" for i in range(len(sql_column_types))] else: - sql_column_names = map(lambda x: '"{}"'.format(x), sql_column_names) + sql_column_names = map(lambda x: f'"{x}"', sql_column_names) - drop_table_sql = "DROP TABLE IF EXISTS {}".format(table_name) + drop_table_sql = f"DROP TABLE IF EXISTS {table_name}" - create_table_sql = "CREATE TABLE {} ({})".format( - table_name, - ", ".join('{} {}'.format(n, t) - for n, t in zip(sql_column_names, sql_column_types))) + create_table_sql = f"""CREATE TABLE {table_name} ({", ".join(f'{n} {t}' for n, t in zip(sql_column_names, sql_column_types))})""" insert_values = ", ".join( - "({})".format( - ", ".join("NULL" if v is None else "'{}'".format(v) - for v, t in zip(row, sql_column_types)) - ) for row in data + f"""({", ".join("NULL" if v is None else f"'{v}'" for v, t in zip(row, sql_column_types))})""" + for row in data ) - insert_sql = "INSERT INTO {} VALUES {}".format(table_name, - insert_values) + insert_sql = f"INSERT INTO {table_name} VALUES {insert_values}" import pymssql with pymssql.connect(**self.params) as conn: @@ -300,7 +281,7 @@ def drop_sql_table(self, table_name): import pymssql with pymssql.connect(**self.params) as conn: with conn.cursor() as cursor: - cursor.execute("DROP TABLE {}".format(table_name)) + cursor.execute(f"DROP TABLE {table_name}") conn.commit() def get_backend(self): @@ -318,12 +299,11 @@ def dbs(): """Parse env variable and initialize connection to given dbs.""" params = connection_params() - db_conn = {} - for c in params: - if c and c in test_connections: - db_conn[c] = test_connections[c](params[c]) - - return db_conn + return { + c: test_connections[c](params[c]) + for c in params + if c and c in test_connections + } class DataBaseTest: @@ -334,35 +314,34 @@ def _check_db(cls, db): if ">" in db: i = db.find(">") if db[:i] in cls.db_conn and \ - cls.db_conn[db[:i]].version <= int(db[i + 1:]): + cls.db_conn[db[:i]].version <= int(db[i + 1:]): raise unittest.SkipTest( - "This test is only run database version higher then {}" - .format(db[i + 1:])) + f"This test is only run database version higher then {db[i + 1:]}" + ) else: db = db[:i] elif "<" in db: i = db.find("<") if db[:i] in cls.db_conn and \ - cls.db_conn[db[:i]].version >= int(db[i + 1:]): + cls.db_conn[db[:i]].version >= int(db[i + 1:]): raise unittest.SkipTest( - "This test is only run on database version lower then {}" - .format(db[i + 1:])) + f"This test is only run on database version lower then {db[i + 1:]}" + ) else: db = db[:i] if db in cls.db_conn: if not cls.db_conn[db].is_module: raise unittest.SkipTest( - "{} module is required for this database".format( - cls.db_conn[db].module)) + f"{cls.db_conn[db].module} module is required for this database" + ) elif not cls.db_conn[db].is_active: raise unittest.SkipTest("Database is not running") + elif db in test_connections.keys(): + raise unittest.SkipTest(f"No connection provided for {db}") else: - if db in test_connections.keys(): - raise unittest.SkipTest("No connection provided for {}".format(db)) - else: - raise Exception("Unsupported database") + raise Exception("Unsupported database") return db @@ -411,13 +390,12 @@ def decorator(function): frame_locals = frame[0].f_locals for db in dbs: - name = 'test_db_' + db + '_' + function.__name__[5:] + name = f'test_db_{db}_{function.__name__[5:]}' frame_locals[name] = cls._setup_test_with(function, db) frame_locals[name].__name__ = name frame_locals[name].place_as = function if function.__doc__ is not None: - frame_locals[name].__doc__ = 'On ' + db + ' run: ' + \ - function.__doc__ + frame_locals[name].__doc__ = f'On {db} run: {function.__doc__}' function.__test__ = False diff --git a/Orange/tests/sql/test_filter.py b/Orange/tests/sql/test_filter.py index a10ad59a9d5..2626a0b346b 100644 --- a/Orange/tests/sql/test_filter.py +++ b/Orange/tests/sql/test_filter.py @@ -141,7 +141,7 @@ def test_on_continuous_attribute_with_unknown_value(self): @dbt.run_on(["postgres"]) def test_on_continuous_attribute_negated(self): filtered_data = filter.SameValue(0, 1, negate=True)(self.table) - correct_data = [row for row in self.data if not row[0] == 1] + correct_data = [row for row in self.data if row[0] != 1] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data) @@ -173,7 +173,7 @@ def test_on_discrete_attribute_with_unknowns(self): @dbt.run_on(["postgres", "mssql"]) def test_on_discrete_attribute_negated(self): filtered_data = filter.SameValue(3, 'a', negate=True)(self.table) - correct_data = [row for row in self.data if not row[3] == 'a'] + correct_data = [row for row in self.data if row[3] != 'a'] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data) @@ -182,7 +182,7 @@ def test_on_discrete_attribute_negated(self): def test_on_discrete_attribute_value_passed_as_int(self): values = self.table.domain[3].values filtered_data = filter.SameValue(3, 0, negate=True)(self.table) - correct_data = [row for row in self.data if not row[3] == values[0]] + correct_data = [row for row in self.data if row[3] != values[0]] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data) @@ -191,7 +191,7 @@ def test_on_discrete_attribute_value_passed_as_int(self): def test_on_discrete_attribute_value_passed_as_float(self): values = self.table.domain[3].values filtered_data = filter.SameValue(3, 0., negate=True)(self.table) - correct_data = [row for row in self.data if not row[3] == values[0]] + correct_data = [row for row in self.data if row[3] != values[0]] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data) diff --git a/Orange/tests/sql/test_sql_table.py b/Orange/tests/sql/test_sql_table.py index 748f7f13beb..448e7a68ae3 100644 --- a/Orange/tests/sql/test_sql_table.py +++ b/Orange/tests/sql/test_sql_table.py @@ -243,7 +243,7 @@ def test_joins(self): def _mock_attribute(self, attr_name, formula=None): if formula is None: - formula = '"%s"' % attr_name + formula = f'"{attr_name}"' class Attr: name = attr_name @@ -258,7 +258,10 @@ def to_sql(): def test_universal_table(self): _, table_name = self.construct_universal_table() - SqlTable(self.conn, """ + SqlTable( + self.conn, + ( + """ SELECT v1.col2 as v1, v2.col2 as v2, @@ -272,7 +275,10 @@ def test_universal_table(self): INNER JOIN %(table_name)s v5 ON v5.col0 = v1.col0 AND v5.col1 = 5 WHERE v1.col1 = 1 ORDER BY v1.col0 - """ % dict(table_name='"%s"' % table_name)) + """ + % dict(table_name=f'"{table_name}"') + ), + ) self.drop_sql_table(table_name) @@ -659,16 +665,11 @@ def test_recovers_connection_after_sql_error(self): np.arange(25).reshape((-1, 1))) sql_table = SqlTable(conn, table_name) - try: - broken_query = "SELECT 1/%s FROM %s" % ( - sql_table.domain.attributes[0].to_sql(), sql_table.table_name) + with contextlib.suppress(BackendError): + broken_query = f"SELECT 1/{sql_table.domain.attributes[0].to_sql()} FROM {sql_table.table_name}" with sql_table.backend.execute_sql_query(broken_query) as cur: cur.fetchall() - except BackendError: - pass - - working_query = "SELECT %s FROM %s" % ( - sql_table.domain.attributes[0].to_sql(), sql_table.table_name) + working_query = f"SELECT {sql_table.domain.attributes[0].to_sql()} FROM {sql_table.table_name}" with sql_table.backend.execute_sql_query(working_query) as cur: cur.fetchall() self.drop_sql_table(table_name) @@ -758,7 +759,7 @@ def test_list_tables_with_schema(self): try: tables = self.backend.list_tables("orange_tests") - self.assertTrue(any([t.name == "efgh" for t in tables])) + self.assertTrue(any(t.name == "efgh" for t in tables)) SqlTable(self.conn, tables[0], inspect_values=True) finally: with self.backend.execute_sql_query("DROP SCHEMA IF EXISTS orange_tests CASCADE"): diff --git a/Orange/tests/test_base.py b/Orange/tests/test_base.py index 6217fb7529d..538a55fdc87 100644 --- a/Orange/tests/test_base.py +++ b/Orange/tests/test_base.py @@ -74,7 +74,7 @@ def test_preprocessors_can_be_passed_in_as_non_iterable(self): def test_preprocessors_can_be_passed_in_as_generator(self): """Since we support iterables, we should support generators as well""" pp = (Discretize(),) - learner = DummyLearnerPP(p for p in pp) + learner = DummyLearnerPP(iter(pp)) self.assertEqual( tuple(learner.active_preprocessors), pp, 'Preprocessors should be able to be passed in as single object ' diff --git a/Orange/tests/test_basket_reader.py b/Orange/tests/test_basket_reader.py index 31fedae1b9c..18f33d19eff 100644 --- a/Orange/tests/test_basket_reader.py +++ b/Orange/tests/test_basket_reader.py @@ -52,7 +52,7 @@ def test_read_variable_only_syntax(self, fname): def test_handles_spaces_between_variables(self, fname): table = read_basket(fname) self.assertEqual(len(table.domain.variables), 3) - self.assertEqual(set(x for x in table[0]), {1, 2, 3}) + self.assertEqual(set(table[0]), {1, 2, 3}) @with_file("""a=1, b=2\na=1, b=4""") def test_variables_can_be_listed_in_any_order(self, fname): diff --git a/Orange/tests/test_classification.py b/Orange/tests/test_classification.py index 3cac2a70256..eacfd868c23 100644 --- a/Orange/tests/test_classification.py +++ b/Orange/tests/test_classification.py @@ -88,9 +88,7 @@ def test_predict_single_instance(self): table = Table("titanic") learn = NaiveBayesLearner() clf = learn(table) - pred = [] - for row in table: - pred.append(clf(row)) + pred = [clf(row) for row in table] def test_prediction_dimensions(self): class MockModel(Model): @@ -149,9 +147,13 @@ def test_value_from_probs(self): y = np.random.randint(1, 6, (nrows, 2)) y[:, 0] = y[:, 0] // 3 # majority = 1 y[:, 1] = (y[:, 1] + 4) // 3 # majority = 2 - domain = Domain([ContinuousVariable('i' + str(i)) for i in range(ncols)], - [DiscreteVariable('c' + str(i), values="0123") - for i in range(y.shape[1])]) + domain = Domain( + [ContinuousVariable(f'i{str(i)}') for i in range(ncols)], + [ + DiscreteVariable(f'c{str(i)}', values="0123") + for i in range(y.shape[1]) + ], + ) t = Table(domain, x, y) learn = DummyMulticlassLearner() clf = learn(t) @@ -169,11 +171,15 @@ def test_probs_from_value(self): # single class variable y = np.random.randint(0, 2, (nrows, 1)) - d = Domain([DiscreteVariable('v' + str(i), - values=[str(v) - for v in np.unique(x[:, i])]) - for i in range(ncols)], - DiscreteVariable('c', values="12")) + d = Domain( + [ + DiscreteVariable( + f'v{str(i)}', values=[str(v) for v in np.unique(x[:, i])] + ) + for i in range(ncols) + ], + DiscreteVariable('c', values="12"), + ) t = Table(d, x, y) learn = DummyLearner() clf = learn(t) @@ -188,9 +194,13 @@ def test_probs_from_value(self): y = np.random.randint(1, 6, (nrows, 2)) y[:, 0] = y[:, 0] // 3 # majority = 1 y[:, 1] = (y[:, 1] + 4) // 3 - 1 # majority = 1 - domain = Domain([ContinuousVariable('i' + str(i)) for i in range(ncols)], - [DiscreteVariable('c' + str(i), values="0123") - for i in range(y.shape[1])]) + domain = Domain( + [ContinuousVariable(f'i{str(i)}') for i in range(ncols)], + [ + DiscreteVariable(f'c{str(i)}', values="0123") + for i in range(y.shape[1]) + ], + ) t = Table(domain, x, y) learn = DummyMulticlassLearner() clf = learn(t) @@ -220,23 +230,23 @@ def test_result_shape(self): with self.subTest(learner.__name__): # model trained on only one value (but three in the domain) - model = learner()(iris[0:100]) + model = learner()(iris[:100]) - res = model(iris[0:50]) + res = model(iris[:50]) self.assertTupleEqual((50,), res.shape) # probabilities must still be for three classes - res = model(iris[0:50], model.Probs) + res = model(iris[:50], model.Probs) self.assertTupleEqual((50, 3), res.shape) # model trained on all classes and predicting with one class try: - model = learner()(iris[0:100]) + model = learner()(iris[:100]) except TypeError: # calibration, threshold learners are skipped # they have some specifics regarding data continue - res = model(iris[0:50], model.Probs) + res = model(iris[:50], model.Probs) self.assertTupleEqual((50, 3), res.shape) def test_result_shape_numpy(self): @@ -260,10 +270,10 @@ def test_result_shape_numpy(self): model = learner(*args)(data) transformed_iris = model.data_to_model_domain(data) - res = model(transformed_iris.X[0:5]) + res = model(transformed_iris.X[:5]) self.assertTupleEqual((5,), res.shape) - res = model(transformed_iris.X[0:1], model.Probs) + res = model(transformed_iris.X[:1], model.Probs) self.assertTupleEqual( (1, len(data.domain.class_var.values)), res.shape ) @@ -282,7 +292,7 @@ def test_predict_proba(self): probs = model.predict_proba(data) shape = (len(data), len(data.domain.class_var.values)) self.assertEqual(probs.shape, shape) - self.assertTrue(np.all(np.sum(probs, axis=1) - 1 < 0.0001)) + self.assertTrue(np.all(np.sum(probs, axis=1) < 1.0001)) class ExpandProbabilitiesTest(unittest.TestCase): @@ -431,9 +441,10 @@ def test_all_models_work_after_unpickling(self): Table.from_table(model.domain, ds).X, Table.from_table(model2.domain, ds).X) np.testing.assert_almost_equal( - model(ds), model2(ds), - err_msg='%s does not return same values when unpickled %s' - % (learner.__class__.__name__, ds.name)) + model(ds), + model2(ds), + err_msg=f'{learner.__class__.__name__} does not return same values when unpickled {ds.name}', + ) def test_all_models_work_after_unpickling_pca(self): datasets = [Table('iris'), Table('titanic')] @@ -456,9 +467,10 @@ def test_all_models_work_after_unpickling_pca(self): Table.from_table(model.domain, ds).X, Table.from_table(model2.domain, ds).X) np.testing.assert_almost_equal( - model(ds), model2(ds), - err_msg='%s does not return same values when unpickled %s' - % (learner.__class__.__name__, ds.name)) + model(ds), + model2(ds), + err_msg=f'{learner.__class__.__name__} does not return same values when unpickled {ds.name}', + ) def test_adequacy_all_learners(self): for learner in all_learners(): diff --git a/Orange/tests/test_clustering_hierarchical.py b/Orange/tests/test_clustering_hierarchical.py index 9435a128c0d..7d3d0bac3d4 100644 --- a/Orange/tests/test_clustering_hierarchical.py +++ b/Orange/tests/test_clustering_hierarchical.py @@ -122,7 +122,8 @@ def pairs(iterable): yield from zip(i1, i2) def score(root): - return sum([self.matrix[i, j] for i, j in pairs(indices(root))]) + return sum(self.matrix[i, j] for i, j in pairs(indices(root))) + score_unordered = score(self.cluster) score_ordered = score(ordered) self.assertGreater(score_unordered, score_ordered) diff --git a/Orange/tests/test_contingency.py b/Orange/tests/test_contingency.py index 7bef15aefbd..3240eb8ae83 100644 --- a/Orange/tests/test_contingency.py +++ b/Orange/tests/test_contingency.py @@ -377,7 +377,7 @@ def test_compute_contingency_row_attribute_sparse(self): def test_compute_contingency_invalid(self): rstate = np.random.RandomState(0xFFFF) X = data.ContinuousVariable("X") - C = data.DiscreteVariable("C", values=["C{}".format(i + 1) for i in range(1024)]) + C = data.DiscreteVariable("C", values=[f"C{i + 1}" for i in range(1024)]) domain = data.Domain([X], [C]) d = data.Table.from_numpy( domain, diff --git a/Orange/tests/test_distances.py b/Orange/tests/test_distances.py index c8820bc5102..3746e0f7f45 100644 --- a/Orange/tests/test_distances.py +++ b/Orange/tests/test_distances.py @@ -952,7 +952,7 @@ def test_attributes(self): metric = MahalanobisDistance(self.x) self.assertEqual(metric(self.x[0], self.x[1]).shape, (1, 1)) self.assertEqual(metric(self.x).shape, (self.n, self.n)) - self.assertEqual(metric(self.x[0:3], self.x[5:7]).shape, (3, 2)) + self.assertEqual(metric(self.x[:3], self.x[5:7]).shape, (3, 2)) self.assertEqual(metric(self.x1, self.x2).shape, (1, 1)) metric(self.x, impute=True) metric(self.x[:-1, :]) diff --git a/Orange/tests/test_distribution.py b/Orange/tests/test_distribution.py index e271276f3c1..6a6530f86ed 100644 --- a/Orange/tests/test_distribution.py +++ b/Orange/tests/test_distribution.py @@ -381,7 +381,7 @@ def test_random(self): disc = distribution.Continuous(d, "petal length") ans = set() - for i in range(1000): + for _ in range(1000): v = disc.sample() self.assertIn(v, self.freqs) ans.add(v) diff --git a/Orange/tests/test_domain.py b/Orange/tests/test_domain.py index 368a50e6f2d..b133c2dbdae 100644 --- a/Orange/tests/test_domain.py +++ b/Orange/tests/test_domain.py @@ -140,8 +140,10 @@ def test_from_numpy_names(self): d = Domain.from_numpy(np.zeros((1, 3)), np.zeros((1, 1)), np.zeros((1, 100))) self.assertTrue(d.anonymous) - self.assertEqual([var.name for var in d.attributes], - ["Feature {}".format(i) for i in range(1, 4)]) + self.assertEqual( + [var.name for var in d.attributes], + [f"Feature {i}" for i in range(1, 4)], + ) self.assertEqual(d.class_var.name, "Target") self.assertEqual([var.name for var in d.metas], ["Meta {:03}".format(i) for i in range(1, 101)]) @@ -170,7 +172,7 @@ def test_from_numpy_values(self): self.assertTrue(d.anonymous) self.assertIsInstance(d.class_var, vartype) if isinstance(vartype, DiscreteVariable): - self.assertEqual(d.class_var.values, ["v{}".format(i) for i in range(1, 3)]) + self.assertEqual(d.class_var.values, [f"v{i}" for i in range(1, 3)]) def test_wrong_vartypes(self): attributes = (age, gender, income) diff --git a/Orange/tests/test_evaluation_scoring.py b/Orange/tests/test_evaluation_scoring.py index 64d1ed18a20..fc4650338af 100644 --- a/Orange/tests/test_evaluation_scoring.py +++ b/Orange/tests/test_evaluation_scoring.py @@ -78,11 +78,7 @@ def test_precision_multiclass(self): self.assertEqual(res[0], 1.) self.assertAlmostEqual(res[1], 0.78333, 5) - for target, prob in ((0, 2 / 3), - (1, 1 / 4), - (2, 1 / 1), - (3, 1 / 1), - (4, 1 / 1)): + for target, prob in ((0, 2 / 3), (1, 1 / 4), (2, 1), (3, 1), (4, 1)): res = self.score(results, target=target, average=None) self.assertEqual(res[0], 1.) self.assertEqual(res[1], prob) @@ -133,11 +129,7 @@ def test_recall_multiclass(self): self.assertEqual(res[0], 1.) self.assertAlmostEqual(res[1], 0.6) - for target, prob in ((0, 2 / 2), - (1, 1 / 2), - (2, 1 / 3), - (3, 1 / 1), - (4, 1 / 2)): + for target, prob in ((0, 1), (1, 1 / 2), (2, 1 / 3), (3, 1), (4, 1 / 2)): res = self.score(results, target=target) self.assertEqual(res[0], 1.) self.assertEqual(res[1], prob) diff --git a/Orange/tests/test_impute.py b/Orange/tests/test_impute.py index 74e328780b0..8ac8fef736c 100644 --- a/Orange/tests/test_impute.py +++ b/Orange/tests/test_impute.py @@ -311,7 +311,7 @@ def test_replacement(self): ) table = data.Table.from_numpy(domain, np.array(X)) - for i in range(0, 3): + for i in range(3): v = impute.Random()(table, domain[i]) self.assertTrue(np.all(np.isfinite(v.compute_value(table)))) diff --git a/Orange/tests/test_instance.py b/Orange/tests/test_instance.py index c8ac3c1eb82..ce74e45743a 100644 --- a/Orange/tests/test_instance.py +++ b/Orange/tests/test_instance.py @@ -41,8 +41,7 @@ def create_domain(self, attributes=(), classes=(), metas=()): meta_vars = [DiscreteVariable(name=m, values=map(str, range(5))) if isinstance(m, str) else m for m in metas] - domain = Domain(attr_vars, class_vars, meta_vars) - return domain + return Domain(attr_vars, class_vars, meta_vars) def test_init_x_no_data(self): domain = self.mock_domain() @@ -265,8 +264,8 @@ def test_str(self): attr.number_of_decimals = 0 self.assertEqual( str(inst), - "[{}]".format(", ".join("{}".format(x) - for x in range(len(self.attributes))))) + "[{}]".format(", ".join(f"{x}" for x in range(len(self.attributes)))), + ) def test_repr(self): domain = self.create_domain(self.attributes) diff --git a/Orange/tests/test_orangetree.py b/Orange/tests/test_orangetree.py index a1d4a4e5a03..c4f22648df3 100644 --- a/Orange/tests/test_orangetree.py +++ b/Orange/tests/test_orangetree.py @@ -49,7 +49,7 @@ def test_min_samples_leaf(self): # tests for lim in (1, 2, 30): args = dict(min_samples_split=2, min_samples_leaf=lim) - args.update(self.no_pruning_args) + args |= self.no_pruning_args clf = self.TreeLearner(binarize=False, **args)(self.data_mixed) self.assertTrue(all(len(node.subset) >= lim for node in self.all_nodes(clf.root) @@ -69,8 +69,9 @@ def test_refuse_binarize_too_many_values(self): lim = clf.MAX_BINARIZATION domain = Domain( - [DiscreteVariable("x", ("v{}".format(i) for i in range(lim + 1)))], - self.class_var) + [DiscreteVariable("x", (f"v{i}" for i in range(lim + 1)))], + self.class_var, + ) data = Table(domain, np.zeros((100, 2))) clf.binarize = False @@ -79,8 +80,8 @@ def test_refuse_binarize_too_many_values(self): self.assertRaises(ValueError, clf, data) domain = Domain( - [DiscreteVariable("x", ("v{}".format(i) for i in range(lim)))], - self.class_var) + [DiscreteVariable("x", (f"v{i}" for i in range(lim)))], self.class_var + ) data = Table(domain, np.zeros((100, 2))) clf.binarize = True clf(data) diff --git a/Orange/tests/test_score_feature.py b/Orange/tests/test_score_feature.py index 8b90a05cd99..dd1dbf7faf1 100644 --- a/Orange/tests/test_score_feature.py +++ b/Orange/tests/test_score_feature.py @@ -131,7 +131,7 @@ def test_rrelieff(self): scorer = RReliefF(random_state=42) weights = scorer(xor, None) best = {xor.domain[attr].name for attr in weights.argsort()[-2:]} - self.assertSetEqual(set(a.name for a in xor.domain.attributes[:2]), best) + self.assertSetEqual({a.name for a in xor.domain.attributes[:2]}, best) weights = scorer(self.housing, None) best = {self.housing.domain[attr].name for attr in weights.argsort()[-6:]} for feature in ('LSTAT', 'RM'): diff --git a/Orange/tests/test_simple_tree.py b/Orange/tests/test_simple_tree.py index 475f504fc65..55222dadecf 100644 --- a/Orange/tests/test_simple_tree.py +++ b/Orange/tests/test_simple_tree.py @@ -30,10 +30,11 @@ def setUp(self): y_cls[np.random.random(self.N) < 0.1] = np.nan y_reg[np.random.random(self.N) < 0.1] = np.nan - di = [Orange.data.domain.DiscreteVariable( - 'd{}'.format(i), ["0", "1"]) for i in range(self.Mi)] - df = [Orange.data.domain.ContinuousVariable( - 'c{}'.format(i)) for i in range(self.Mf)] + di = [ + Orange.data.domain.DiscreteVariable(f'd{i}', ["0", "1"]) + for i in range(self.Mi) + ] + df = [Orange.data.domain.ContinuousVariable(f'c{i}') for i in range(self.Mf)] dcls = Orange.data.domain.DiscreteVariable('yc', ["0", "1", "2"]) dreg = Orange.data.domain.ContinuousVariable('yr') domain_cls = Orange.data.domain.Domain(di + df, dcls) diff --git a/Orange/tests/test_tab_reader.py b/Orange/tests/test_tab_reader.py index 85c1a8aa846..e44cc316621 100644 --- a/Orange/tests/test_tab_reader.py +++ b/Orange/tests/test_tab_reader.py @@ -236,7 +236,7 @@ def test_metadata(self): self.data.attributes["b"] = "bb" fname = path.join(tempdir, "out.tab") TabReader.write_table_metadata(fname, self.data) - self.assertTrue(path.isfile(fname + ".metadata")) + self.assertTrue(path.isfile(f"{fname}.metadata")) finally: shutil.rmtree(tempdir) @@ -246,7 +246,7 @@ def test_no_metadata(self): self.data.attributes = OrderedDict() fname = path.join(tempdir, "out.tab") TabReader.write_table_metadata(fname, self.data) - self.assertFalse(path.isfile(fname + ".metadata")) + self.assertFalse(path.isfile(f"{fname}.metadata")) finally: shutil.rmtree(tempdir) @@ -256,10 +256,10 @@ def test_had_metadata_now_there_is_none(self): self.data.attributes["a"] = "aa" fname = path.join(tempdir, "out.tab") TabReader.write_table_metadata(fname, self.data) - self.assertTrue(path.isfile(fname + ".metadata")) + self.assertTrue(path.isfile(f"{fname}.metadata")) del self.data.attributes["a"] TabReader.write_table_metadata(fname, self.data) - self.assertFalse(path.isfile(fname + ".metadata")) + self.assertFalse(path.isfile(f"{fname}.metadata")) finally: shutil.rmtree(tempdir) @@ -277,7 +277,7 @@ def test_number_of_decimals(self): def test_many_discrete(): b = io.StringIO() b.write("Poser\nd\n\n") - b.writelines("K" + str(i) + "\n" for i in range(30000)) + b.writelines(f"K{str(i)}" + "\n" for i in range(30000)) start = time.time() _ = TabReader(b).read() elapsed = time.time() - start diff --git a/Orange/tests/test_table.py b/Orange/tests/test_table.py index 27c8e9986f1..befc422785d 100644 --- a/Orange/tests/test_table.py +++ b/Orange/tests/test_table.py @@ -417,13 +417,13 @@ def test_has_missing(self): def test_shuffle(self): d = data.Table("zoo") crc = d.checksum() - names = set(str(x["name"]) for x in d) + names = {str(x["name"]) for x in d} ids = d.ids with d.unlocked_reference(): d.shuffle() self.assertNotEqual(crc, d.checksum()) - self.assertSetEqual(names, set(str(x["name"]) for x in d)) + self.assertSetEqual(names, {str(x["name"]) for x in d}) self.assertTrue(np.any(ids - d.ids != 0)) crc2 = d.checksum() @@ -443,31 +443,22 @@ def test_shuffle(self): @staticmethod def not_less_ex(ex1, ex2): - for v1, v2 in zip(ex1, ex2): - if v1 != v2: - return v1 < v2 - return True + return next((v1 < v2 for v1, v2 in zip(ex1, ex2) if v1 != v2), True) @staticmethod def sorted(d): - for i in range(1, len(d)): - if not TableTestCase.not_less_ex(d[i - 1], d[i]): - return False - return True + return all(TableTestCase.not_less_ex(d[i - 1], d[i]) for i in range(1, len(d))) @staticmethod def not_less_ex_ord(ex1, ex2, ord): - for a in ord: - if ex1[a] != ex2[a]: - return ex1[a] < ex2[a] - return True + return next((ex1[a] < ex2[a] for a in ord if ex1[a] != ex2[a]), True) @staticmethod def sorted_ord(d, ord): - for i in range(1, len(d)): - if not TableTestCase.not_less_ex_ord(d[i - 1], d[i], ord): - return False - return True + return all( + TableTestCase.not_less_ex_ord(d[i - 1], d[i], ord) + for i in range(1, len(d)) + ) def test_copy(self): t = data.Table.from_numpy( @@ -817,8 +808,11 @@ def test_filter_string_works_for_numeric_columns(self): for args, expected in filters: f = fs(var, *args) filtered_data = filter.Values([f])(data) - self.assertEqual(len(filtered_data), expected["rows"], - "{} returned wrong number of rows".format(args)) + self.assertEqual( + len(filtered_data), + expected["rows"], + f"{args} returned wrong number of rows", + ) def test_filter_value_continuous(self): d = data.Table("iris") @@ -1138,8 +1132,11 @@ def test_valueFilter_stringList(self): for args, expected in filters: f = fs(var, *args) filtered_data = filter.Values([f])(data) - self.assertEqual(len(filtered_data), expected["rows"], - "{} returned wrong number of rows".format(args)) + self.assertEqual( + len(filtered_data), + expected["rows"], + f"{args} returned wrong number of rows", + ) def test_table_dtypes(self): table = data.Table("iris") @@ -1213,16 +1210,16 @@ def test_str_sparse(self): iris.X = sp.csr_matrix(iris.X) # instance s0 = "[sepal length=5.1, sepal width=3.5, " \ - "petal length=1.4, petal width=0.2 | Iris-setosa]" + "petal length=1.4, petal width=0.2 | Iris-setosa]" self.assertEqual(s0, str(iris[0])) # table table_str = str(iris) lines = table_str.split('\n') self.assertEqual(150, len(lines)) - self.assertEqual("[" + s0 + ",", lines[0]) + self.assertEqual(f"[{s0},", lines[0]) slast = "[sepal length=5.9, sepal width=3.0, " \ - "petal length=5.1, petal width=1.8 | Iris-virginica]" - self.assertEqual(" " + slast + "]", lines[-1]) + "petal length=5.1, petal width=1.8 | Iris-virginica]" + self.assertEqual(f" {slast}]", lines[-1]) def column_sizes(table): @@ -1277,8 +1274,7 @@ def create_domain(self, attributes=(), classes=(), metas=()): meta_vars = [data.DiscreteVariable(name=m, values=map(str, range(5))) if isinstance(m, str) else m for m in metas] - domain = data.Domain(attr_vars, class_vars, meta_vars) - return domain + return data.Domain(attr_vars, class_vars, meta_vars) class CreateEmptyTable(TableTests): @@ -1334,6 +1330,7 @@ def test_calling_new_with_keyword_argument_filename_calls_read_data( class CreateTableWithUrl(TableTests): def test_url_no_scheme(self): + class SkipRest(Exception): pass @@ -1346,7 +1343,7 @@ class SkipRest(Exception): except SkipRest: pass - mock_urlopen.assert_called_once_with('http://' + url) + mock_urlopen.assert_called_once_with(f'http://{url}') class _MockUrlOpen(MagicMock): headers = {'content-disposition': 'attachment; filename="Something-FormResponses.tsv"; ' @@ -1500,7 +1497,7 @@ def test_creates_a_table_from_domain_and_list_and_metas(self): def test_creates_a_table_from_list_of_instances(self): table = data.Table('iris') - new_table = data.Table.from_list(table.domain, [d for d in table]) + new_table = data.Table.from_list(table.domain, list(table)) self.assertIs(table.domain, new_table.domain) np.testing.assert_almost_equal(table.X, new_table.X) np.testing.assert_almost_equal(table.Y, new_table.Y) @@ -1510,7 +1507,7 @@ def test_creates_a_table_from_list_of_instances(self): def test_creates_a_table_from_list_of_instances_with_metas(self): table = data.Table('zoo') - new_table = data.Table.from_list(table.domain, [d for d in table]) + new_table = data.Table.from_list(table.domain, list(table)) self.assertIs(table.domain, new_table.domain) np.testing.assert_almost_equal(table.X, new_table.X) np.testing.assert_almost_equal(table.Y, new_table.Y) @@ -1543,7 +1540,7 @@ def test_creates_a_table_with_given_X_Y_and_metas(self): np.testing.assert_almost_equal(table.metas, self.meta_data) def test_creates_a_discrete_class_if_Y_has_few_distinct_values(self): - Y = np.array([float(np.random.randint(0, 2)) for i in self.data]) + Y = np.array([float(np.random.randint(0, 2)) for _ in self.data]) table = data.Table.from_numpy(None, self.data, Y, self.meta_data) np.testing.assert_almost_equal(table.Y, Y) @@ -1801,11 +1798,16 @@ def test_can_use_metas_as_new_columns(self): _, _, m = column_sizes(self.table) order = np.random.permutation(range(-m, 0)) new_metas = [self.domain.metas[::-1][i] for i in order] - new_domain = self.create_domain(new_metas[0:2], [new_metas[2]], new_metas[3:5]) + new_domain = self.create_domain(new_metas[:2], [new_metas[2]], new_metas[3:5]) new_table = data.Table.from_table(new_domain, self.table) self.assert_table_with_filter_matches( - new_table, self.table, xcols=order[0:2], ycols=order[2], mcols=order[3:5]) + new_table, + self.table, + xcols=order[:2], + ycols=order[2], + mcols=order[3:5], + ) def test_can_use_combination_of_all_as_new_columns(self): a, c, m = column_sizes(self.table) @@ -3369,8 +3371,8 @@ def test_from_table_add_lots_of_sparse_columns(self): # add 2*n_attrs+1 sparse feature, should became sparse domain = self.iris.domain.copy() domain.attributes += tuple( - ContinuousVariable('S' + str(i), compute_value=SparseCV(), sparse=True) - for i in range(2*n_attrs + 1) + ContinuousVariable(f'S{str(i)}', compute_value=SparseCV(), sparse=True) + for i in range(2 * n_attrs + 1) ) d = self.iris.transform(domain) self.assertTrue(sp.issparse(d.X)) @@ -3397,7 +3399,7 @@ def test_from_table_sparse_metas_with_strings(self): # replace metas with text and 100 sparse features, should be dense domain = self.iris.domain.copy() domain._metas = (StringVariable('text'),) + tuple( - ContinuousVariable('S' + str(i), compute_value=SparseCV(), sparse=True) + ContinuousVariable(f'S{str(i)}', compute_value=SparseCV(), sparse=True) for i in range(100) ) d = self.iris.transform(domain) diff --git a/Orange/tests/test_txt_reader.py b/Orange/tests/test_txt_reader.py index 4d115e1493a..ad2e7dbceda 100644 --- a/Orange/tests/test_txt_reader.py +++ b/Orange/tests/test_txt_reader.py @@ -63,11 +63,11 @@ def read_easy(self, s, name): f1, f2, f3 = table.domain.variables self.assertIsInstance(f1, DiscreteVariable) - self.assertEqual(f1.name, name + "1") + self.assertEqual(f1.name, f"{name}1") self.assertIsInstance(f2, ContinuousVariable) - self.assertEqual(f2.name, name + "2") + self.assertEqual(f2.name, f"{name}2") self.assertIsInstance(f3, ContinuousVariable) - self.assertEqual(f3.name, name + "3") + self.assertEqual(f3.name, f"{name}3") finally: os.remove(filename) diff --git a/Orange/tests/test_xlsx_reader.py b/Orange/tests/test_xlsx_reader.py index 75eedcf5487..b5b66360f88 100644 --- a/Orange/tests/test_xlsx_reader.py +++ b/Orange/tests/test_xlsx_reader.py @@ -17,11 +17,11 @@ def get_dataset(name): def get_xlsx_reader(name: str) -> io.ExcelReader: - return io.ExcelReader(get_dataset(name + ".xlsx")) + return io.ExcelReader(get_dataset(f"{name}.xlsx")) def get_xls_reader(name: str) -> io.XlsReader: - return io.XlsReader(get_dataset(name + ".xls")) + return io.XlsReader(get_dataset(f"{name}.xls")) def read_file(reader: Callable, name: str) -> Table: @@ -87,7 +87,7 @@ def test_read(self, reader: Callable[[str], io.FileFormat]): self.assertEqual(len(domain.attributes), 4) for i, var in enumerate(domain.attributes): self.assertIsInstance(var, ContinuousVariable) - self.assertEqual(var.name, "Feature {}".format(i + 1)) + self.assertEqual(var.name, f"Feature {i + 1}") np.testing.assert_almost_equal(table.X, np.array([[0.1, 0.5, 0.1, 21], [0.2, 0.1, 2.5, 123], diff --git a/Orange/tree.py b/Orange/tree.py index 8a76ce43aa4..7d96dc98994 100644 --- a/Orange/tree.py +++ b/Orange/tree.py @@ -93,12 +93,14 @@ def _set_child_descriptions(self, child, child_idx, conditions): child.condition = conditions[attr] & in_brnch else: child.condition = in_brnch - vals = [attr.values[j] for j in sorted(child.condition)] - if not vals: - child.description = "(unreachable)" + if vals := [attr.values[j] for j in sorted(child.condition)]: + child.description = ( + vals[0] + if len(vals) == 1 + else f'{", ".join(vals[:-1])} or {vals[-1]}' + ) else: - child.description = vals[0] if len(vals) == 1 else \ - "{} or {}".format(", ".join(vals[:-1]), vals[-1]) + child.description = "(unreachable)" class NumericNode(Node): @@ -125,8 +127,7 @@ def _set_child_descriptions(self, child, child_idx, conditions): elif child_idx == 1 and (lower is None or threshold > lower): lower = threshold child.condition = (lower, upper) - child.description = \ - "{} {}".format("≤>"[child_idx], attr.str_val(threshold)) + child.description = f'{"≤>"[child_idx]} {attr.str_val(threshold)}' class TreeModel(TreeModelInterface): @@ -205,13 +206,12 @@ def predict(self, X): predictions = self.get_values(X) if self.domain.class_var.is_continuous: return predictions[:, 0] - else: - sums = np.sum(predictions, axis=1) - # This can't happen because nodes with 0 instances are prohibited - # zeros = (sums == 0) - # predictions[zeros] = 1 - # sums[zeros] = predictions.shape[1] - return predictions / sums[:, np.newaxis] + sums = np.sum(predictions, axis=1) + # This can't happen because nodes with 0 instances are prohibited + # zeros = (sums == 0) + # predictions[zeros] = 1 + # sums[zeros] = predictions.shape[1] + return predictions / sums[:, np.newaxis] def node_count(self): def _count(node): @@ -236,8 +236,7 @@ def get_instances(self, nodes): return self.instances[indices] def get_indices(self, nodes): - subsets = [node.subset for node in nodes] - if subsets: + if subsets := [node.subset for node in nodes]: return np.unique(np.hstack(subsets)) @staticmethod @@ -259,14 +258,13 @@ def rule(cls, node): if isinstance(parent, NumericNode): lower, upper = node.condition if upper is None: - rules.append("{} > {}".format(name, attr.repr_val(lower))) + rules.append(f"{name} > {attr.repr_val(lower)}") elif lower is None: - rules.append("{} ≤ {}".format(name, attr.repr_val(upper))) + rules.append(f"{name} ≤ {attr.repr_val(upper)}") else: - rules.append("{} < {} ≤ {}".format( - attr.repr_val(lower), name, attr.repr_val(upper))) + rules.append(f"{attr.repr_val(lower)} < {name} ≤ {attr.repr_val(upper)}") else: - rules.append("{}: {}".format(name, node.description)) + rules.append(f"{name}: {node.description}") used_attrs.add(node.parent.attr_idx) return rules diff --git a/Orange/util.py b/Orange/util.py index 23d43a23277..5292fbea39c 100644 --- a/Orange/util.py +++ b/Orange/util.py @@ -164,7 +164,7 @@ def deprecated(obj): Instead, use C.new() ... 'old behavior' """ - alternative = ('; Instead, use ' + obj) if isinstance(obj, str) else '' + alternative = f'; Instead, use {obj}' if isinstance(obj, str) else '' def decorator(func): @wraps(func) @@ -184,9 +184,7 @@ def literal_eval(literal): import ast # ast.literal_eval does not parse empty set ¯\_(ツ)_/¯ - if literal == "set()": - return set() - return ast.literal_eval(literal) + return set() if literal == "set()" else ast.literal_eval(literal) op_map = { @@ -274,13 +272,13 @@ def __new__(mcs, name, bases, attrs): cls.registry[name] = cls return cls - def __iter__(cls): - return iter(cls.registry) + def __iter__(self): + return iter(self.registry) - def __str__(cls): - if cls in cls.registry.values(): - return cls.__name__ - return '{}({{{}}})'.format(cls.__name__, ', '.join(cls.registry)) + def __str__(self): + if self in self.registry.values(): + return self.__name__ + return '{}({{{}}})'.format(self.__name__, ', '.join(self.registry)) def namegen(prefix='_', *args, spec_count=count, **kwargs): @@ -337,8 +335,7 @@ def inherit_docstrings(cls): for method in cls.__dict__.values(): if inspect.isfunction(method) and method.__doc__ is None: for parent in cls.__mro__[1:]: - __doc__ = getattr(parent, method.__name__, None).__doc__ - if __doc__: + if __doc__ := getattr(parent, method.__name__, None).__doc__: method.__doc__ = __doc__ break return cls @@ -382,7 +379,7 @@ def Reprable_repr_pretty(name, itemsiter, printer, cycle): printer.text("{0}(...)".format("name")) else: def printitem(field, value): - printer.text(field + "=") + printer.text(f"{field}=") printer.pretty(value) def printsep(): @@ -461,12 +458,11 @@ def _reprable_fields(self): def _reprable_omit_param(self, name, default, value): if default is value: return True - if type(default) is type(value): - try: - return default == value - except (ValueError, TypeError): - return False - else: + if type(default) is not type(value): + return False + try: + return default == value + except (ValueError, TypeError): return False def _reprable_items(self): @@ -560,10 +556,7 @@ def funcv(*args, out=None, dtype=dtype, casting="unsafe", **kwargs): out = np.empty(shape, dtype) res = func_(*args, out, dtype=dtype, casting=casting, **kwargs) - if res.shape == () and not have_out: - return res.item() - else: - return res + return res.item() if res.shape == () and not have_out else res return funcv diff --git a/Orange/widgets/data/owcolor.py b/Orange/widgets/data/owcolor.py index e0018c3d6c6..d67f28b71a9 100644 --- a/Orange/widgets/data/owcolor.py +++ b/Orange/widgets/data/owcolor.py @@ -101,10 +101,7 @@ def reset(self): @property def colors(self): - if self.new_colors is None: - return self.var.colors - else: - return self.new_colors + return self.var.colors if self.new_colors is None else self.new_colors def set_color(self, i, color): if self.new_colors is None: @@ -317,9 +314,7 @@ def data(self, index, role=Qt.DisplayRole): return QColor(*color) if role == Qt.ToolTipRole: return color_to_hex(color) - if role == ColorRole: - return color - return None + return color if role == ColorRole else None def setData(self, index, value, role): row, col = index.row(), index.column() @@ -367,9 +362,7 @@ def _column1(): return palette if role == StripRole: return palette.color_strip(128, 16) - if role == Qt.SizeHintRole: - return QSize(150, 16) - return None + return QSize(150, 16) if role == Qt.SizeHintRole else None def _column2(): if role == Qt.SizeHintRole: diff --git a/Orange/widgets/data/owconcatenate.py b/Orange/widgets/data/owconcatenate.py index 8c395892319..352f2f938d4 100644 --- a/Orange/widgets/data/owconcatenate.py +++ b/Orange/widgets/data/owconcatenate.py @@ -188,10 +188,7 @@ def incompatible_types(self): types_.add(type(self.primary_data)) for table in self.more_data: types_.add(type(table)) - if len(types_) > 1: - return True - - return False + return len(types_) > 1 @gui.deferred def commit(self): @@ -201,10 +198,7 @@ def commit(self): tables = [self.primary_data] + list(self.more_data) domain = self.primary_data.domain elif self.more_data: - if self.ignore_compute_value: - tables = self._dumb_tables() - else: - tables = self.more_data + tables = self._dumb_tables() if self.ignore_compute_value else self.more_data domains = [table.domain for table in tables] domain = self.merge_domains(domains) @@ -212,8 +206,7 @@ def commit(self): assert domain is not None names = [getattr(t, 'name', '') for t in tables] if len(names) != len(set(names)): - names = ['{} ({})'.format(name, i) - for i, name in enumerate(names)] + names = [f'{name} ({i})' for i, name in enumerate(names)] source_var = Orange.data.DiscreteVariable( get_unique_names(domain, self.source_attr_name), values=names @@ -223,8 +216,7 @@ def commit(self): domain, **{places[self.source_column_role]: (source_var,)}) - tables = [table.transform(domain) for table in tables] - if tables: + if tables := [table.transform(domain) for table in tables]: data = type(tables[0]).concatenate(tables) if source_var: source_ids = np.array(list(flatten( @@ -285,9 +277,9 @@ def send_report(self): else: items["Domain"] = self.tr(self.domain_opts[self.merge_type]).lower() if self.append_source_column: - items["Source data ID"] = "{} (as {})".format( - self.source_attr_name, - self.id_roles[self.source_column_role].lower()) + items[ + "Source data ID" + ] = f"{self.source_attr_name} (as {self.id_roles[self.source_column_role].lower()})" self.report_items(items) def merge_domains(self, domains): @@ -333,11 +325,9 @@ def _unique_vars(seq: List[Orange.data.Variable]): continue if desc.template.is_discrete: sattr_values = set(desc.values) - # don't use sets: keep the order - missing_values = tuple( + if missing_values := tuple( val for val in el.values if val not in sattr_values - ) - if missing_values: + ): attrs[el] = attrs[el]._replace( original=False, values=desc.values + missing_values) diff --git a/Orange/widgets/data/owcontinuize.py b/Orange/widgets/data/owcontinuize.py index e0b021bdec4..2b2be5f6066 100644 --- a/Orange/widgets/data/owcontinuize.py +++ b/Orange/widgets/data/owcontinuize.py @@ -125,12 +125,13 @@ def enable_normalization(self): button.setEnabled(True) def constructContinuizer(self): - conzer = DomainContinuizer( - multinomial_treatment=self.multinomial_treats[self.multinomial_treatment][1], + return DomainContinuizer( + multinomial_treatment=self.multinomial_treats[ + self.multinomial_treatment + ][1], continuous_treatment=self.continuous_treatment, - class_treatment=self.class_treats[self.class_treatment][1] + class_treatment=self.class_treats[self.class_treatment][1], ) - return conzer @gui.deferred def commit(self): @@ -190,8 +191,7 @@ def make_indicator_var(source, value_ind, weight=None): else: indicator = WeightedIndicator(source, value=value_ind, weight=weight) return Orange.data.ContinuousVariable( - "{}={}".format(source.name, source.values[value_ind]), - compute_value=indicator + f"{source.name}={source.values[value_ind]}", compute_value=indicator ) @@ -380,12 +380,12 @@ def __call__(self, data): any(var.is_discrete and len(var.values) > 2 for var in domain)): raise ValueError("Domain has multinomial attributes") - newdomain = continuize_domain( + return continuize_domain( data, self.multinomial_treatment, self.continuous_treatment, - self.class_treatment) - return newdomain + self.class_treatment, + ) if __name__ == "__main__": # pragma: no cover diff --git a/Orange/widgets/data/owcorrelations.py b/Orange/widgets/data/owcorrelations.py index 1b4d5f301dc..1739316c17f 100644 --- a/Orange/widgets/data/owcorrelations.py +++ b/Orange/widgets/data/owcorrelations.py @@ -72,9 +72,13 @@ def get_clusters_of_attributes(self): data = Normalize()(self.data).X.T kmeans = KMeans(n_clusters=self.n_clusters, random_state=0).fit(data) labels_attrs = sorted([(l, i) for i, l in enumerate(kmeans.labels_)]) - return [Cluster(instances=list(pair[1] for pair in group), - centroid=kmeans.cluster_centers_[l]) - for l, group in groupby(labels_attrs, key=lambda x: x[0])] + return [ + Cluster( + instances=[pair[1] for pair in group], + centroid=kmeans.cluster_centers_[l], + ) + for l, group in groupby(labels_attrs, key=lambda x: x[0]) + ] def get_states(self, initial_state): """ @@ -148,7 +152,7 @@ def compute_score(self, state): data = self.master.cont_data.X corr = pearsonr if corr_type == CorrelationType.PEARSON else spearmanr r, p_value = corr(data[:, attr1], data[:, attr2]) - return -abs(r) if not np.isnan(r) else NAN, r, p_value + return NAN if np.isnan(r) else -abs(r), r, p_value def row_for_state(self, score, state): attrs = sorted((self.attrs[x] for x in state), key=attrgetter("name")) diff --git a/Orange/widgets/data/owcreateclass.py b/Orange/widgets/data/owcreateclass.py index 936ee69222c..86862d3a307 100644 --- a/Orange/widgets/data/owcreateclass.py +++ b/Orange/widgets/data/owcreateclass.py @@ -343,9 +343,7 @@ def _add_line(): self.remove_buttons.append(button) self.rules_box.addWidget(button, n_lines, 0) self.counts.append([]) - for coli, kwargs in enumerate( - (dict(), - dict(styleSheet="color: gray"))): + for coli, kwargs in enumerate(({}, dict(styleSheet="color: gray"))): label = QLabel(alignment=Qt.AlignCenter, **kwargs) self.counts[-1].append(label) self.rules_box.addWidget(label, n_lines, 3 + coli) @@ -401,8 +399,10 @@ def class_labels(self): if re.match("^C\\d+", label)), default=0) class_count = count(largest_c + 1) - return [label_edit.text() or "C{}".format(next(class_count)) - for label_edit, _ in self.line_edits] + return [ + label_edit.text() or f"C{next(class_count)}" + for label_edit, _ in self.line_edits + ] def update_counts(self): """Recompute and update the counts of matches.""" diff --git a/Orange/widgets/data/owcreateinstance.py b/Orange/widgets/data/owcreateinstance.py index 182d8552c83..5e66fd76023 100644 --- a/Orange/widgets/data/owcreateinstance.py +++ b/Orange/widgets/data/owcreateinstance.py @@ -110,6 +110,8 @@ def __init__(self, parent: QWidget, variable: ContinuousVariable, sp_edit = QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) sp_edit.setHorizontalStretch(1) + + class DoubleSpinBox(QDoubleSpinBox): def sizeHint(self) -> QSize: size: QSize = super().sizeHint() @@ -122,9 +124,8 @@ def validate(self, text: str, pos: int) -> Tuple[int, str, int]: return state, text, pos def textFromValue(self, value): - if not np.isfinite(value): - return "?" - return super().textFromValue(value) + return super().textFromValue(value) if np.isfinite(value) else "?" + self._spin = DoubleSpinBox( parent, diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index 7cb790f5d88..bd2646e71e0 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -119,10 +119,7 @@ class Options: RowSpec = RowSpec ColumnType = ColumnType - def __init__(self, encoding='utf-8', dialect=csv.excel(), - columntypes: Iterable[Tuple[range, 'ColumnType']] = (), - rowspec=((range(0, 1), RowSpec.Header),), - decimal_separator=".", group_separator="") -> None: + def __init__(self, encoding='utf-8', dialect=csv.excel(), columntypes: Iterable[Tuple[range, 'ColumnType']] = (), rowspec=((range(1), RowSpec.Header), ), decimal_separator=".", group_separator="") -> None: self.encoding = encoding self.dialect = dialect self.columntypes = list(columntypes) # type: List[Tuple[range, ColumnType]] @@ -374,10 +371,7 @@ def dialog_button_box_set_enabled(buttonbox, enabled): if not enabled: if b.property(stashname) is None: b.setProperty(stashname, b.isEnabledTo(buttonbox)) - b.setEnabled( - role == QDialogButtonBox.RejectRole or - role == QDialogButtonBox.HelpRole - ) + b.setEnabled(role in [QDialogButtonBox.RejectRole, QDialogButtonBox.HelpRole]) else: stashed_state = b.property(stashname) if isinstance(stashed_state, bool): @@ -602,10 +596,7 @@ def default_options_for_mime_type( pass else: break - if header: - rowspec = [(range(0, 1), RowSpec.Header)] - else: - rowspec = [] + rowspec = [(range(1), RowSpec.Header)] if header else [] return Options(dialect=dialect, encoding=encoding, rowspec=rowspec) @@ -957,10 +948,7 @@ def current_item(self): return None item = self.recent_combo.model().item(idx) # type: QStandardItem - if isinstance(item, ImportItem): - return item - else: - return None + return item if isinstance(item, ImportItem) else None def _activate_import_dialog(self): """Activate the Import Options dialog for the current item.""" @@ -1014,7 +1002,7 @@ def set_selected_file(self, filename, options=None): def _local_settings(cls): # type: () -> QSettings """Return a QSettings instance with local persistent settings.""" - filename = "{}.ini".format(qname(cls)) + filename = f"{qname(cls)}.ini" fname = os.path.join(settings.widget_settings_dir(), filename) return QSettings(fname, QSettings.IniFormat) @@ -1142,9 +1130,7 @@ def __set_running_state(self): self.load_button.setText("Restart") path = self.current_item().path() self.Error.clear() - self.summary_text.setText( - "
Loading: {}
".format(prettyfypath(path)) - ) + self.summary_text.setText(f"
Loading: {prettyfypath(path)}
") def __clear_running_state(self, ): self.progressBarFinished() @@ -1766,10 +1752,7 @@ def index_where(iterable, pred): If no element matches return `None`. """ - for i, el in enumerate(iterable): - if pred(el): - return i - return None + return next((i for i, el in enumerate(iterable) if pred(el)), None) def pandas_to_table(df): @@ -1823,9 +1806,8 @@ def pandas_to_table(df): var = Orange.data.ContinuousVariable.make(str(header)) else: warnings.warn( - "Column '{}' with dtype: {} skipped." - .format(header, series.dtype), - UserWarning + f"Column '{header}' with dtype: {series.dtype} skipped.", + UserWarning, ) continue columns.append((var, orangecol)) @@ -1839,11 +1821,7 @@ def pandas_to_table(df): else: X = np.empty((df.shape[0], 0), dtype=np.float64) metas = [v for v, _ in cols_m] - if cols_m: - M = np.column_stack([a for _, a in cols_m]) - else: - M = None - + M = np.column_stack([a for _, a in cols_m]) if cols_m else None domain = Orange.data.Domain(variables, metas=metas) return Orange.data.Table.from_numpy(domain, X, None, M) diff --git a/Orange/widgets/data/owdatainfo.py b/Orange/widgets/data/owdatainfo.py index 21e38391217..7b4ecd4d02d 100644 --- a/Orange/widgets/data/owdatainfo.py +++ b/Orange/widgets/data/owdatainfo.py @@ -110,8 +110,7 @@ def _p_location(data): @staticmethod def _p_size(data, exact=False): - exact = exact or SqlTable is None or not isinstance(data, SqlTable) - if exact: + if exact := exact or SqlTable is None or not isinstance(data, SqlTable): n = len(data) desc = f"{n} {pl(n, 'row')}" else: @@ -120,10 +119,15 @@ def _p_size(data, exact=False): ncols = len(data.domain.variables) + len(data.domain.metas) desc += f", {ncols} {pl(ncols, 'column')}" - sparseness = [s for s, m in (("features", data.X_density), - ("meta attributes", data.metas_density), - ("targets", data.Y_density)) if m() > 1] - if sparseness: + if sparseness := [ + s + for s, m in ( + ("features", data.X_density), + ("meta attributes", data.metas_density), + ("targets", data.Y_density), + ) + if m() > 1 + ]: desc += "; sparse {', '.join(sparseness)}" return desc @@ -135,9 +139,8 @@ def _p_targets(self, data): if class_var := data.domain.class_var: if class_var.is_continuous: return "numeric target variable" - else: - nclasses = len(class_var.values) - return "categorical outcome with " \ + nclasses = len(class_var.values) + return "categorical outcome with " \ f"{nclasses} {pl(nclasses, 'class|classes')}" if class_vars := data.domain.class_vars: disc_class = self._count(class_vars, DiscreteVariable) @@ -146,7 +149,7 @@ def _p_targets(self, data): return f"{disc_class} categorical {pl(disc_class, 'target')}" elif not disc_class: return f"{cont_class} numeric {pl(cont_class, 'target')}" - return "multi-target data,
" + self._pack_var_counts(class_vars) + return f"multi-target data,
{self._pack_var_counts(class_vars)}" @classmethod def _p_metas(cls, data): diff --git a/Orange/widgets/data/owdatasampler.py b/Orange/widgets/data/owdatasampler.py index 6f14556cebe..b8ac8dde16c 100644 --- a/Orange/widgets/data/owdatasampler.py +++ b/Orange/widgets/data/owdatasampler.py @@ -128,8 +128,10 @@ def set_sampling_type_i(): self.selected_fold_spin = gui.spin( ibox, self, "selectedFold", 1, self.number_of_folds, addToLayout=False, callback=self.fold_changed) - form.addRow("Unused subset:" if not self.compatibility_mode - else "Selected subset:", self.selected_fold_spin) + form.addRow( + "Selected subset:" if self.compatibility_mode else "Unused subset:", + self.selected_fold_spin, + ) gui.appendRadioButton(sampling, "Bootstrap") @@ -226,8 +228,8 @@ def commit(self): else: if self.indices is None or not self.use_seed: self.updateindices() - if self.indices is None: - return + if self.indices is None: + return if self.sampling_type in ( self.FixedProportion, self.FixedSize, self.Bootstrap): remaining, sample = self.indices @@ -400,10 +402,7 @@ def __call__(self, table): shuffled = np.arange(len(table)) rgen.shuffle(shuffled) empty = np.array([], dtype=int) - if self.n == 0: - return shuffled, empty - else: - return empty, shuffled + return (shuffled, empty) if self.n == 0 else (empty, shuffled) elif self.stratified and table.domain.has_discrete_class: test_size = max(len(table.domain.class_var.values), self.n) splitter = skl.StratifiedShuffleSplit( diff --git a/Orange/widgets/data/owdatasets.py b/Orange/widgets/data/owdatasets.py index 9ab562d3acc..0dc667c2a66 100644 --- a/Orange/widgets/data/owdatasets.py +++ b/Orange/widgets/data/owdatasets.py @@ -68,9 +68,7 @@ def sizeHint(self, option, index): opt.features |= QStyleOptionViewItem.HasDecoration widget = option.widget style = widget.style() if widget is not None else QApplication.style() - sh = style.sizeFromContents( - QStyle.CT_ItemViewItem, opt, QSize(), widget) - return sh + return style.sizeFromContents(QStyle.CT_ItemViewItem, opt, QSize(), widget) class SizeDelegate(UniformHeightDelegate): @@ -428,8 +426,7 @@ def selected_dataset(self): def filter(self): filter_string = self.filterLineEdit.text().strip() - proxyModel = self.view.model() - if proxyModel: + if proxyModel := self.view.model(): proxyModel.setFilterFixedString(filter_string) def __on_selection(self): @@ -611,17 +608,16 @@ def description_html(datainfo): """ Summarize a data info as a html fragment. """ - html = [] - year = " ({})".format(str(datainfo.year)) if datainfo.year else "" - source = ", from {}".format(datainfo.source) if datainfo.source else "" - - html.append("{}{}{}".format(escape(datainfo.title), year, source)) - html.append("

{}

".format(datainfo.description)) - seealso = make_html_list(datainfo.seealso) - if seealso: + year = f" ({str(datainfo.year)})" if datainfo.year else "" + source = f", from {datainfo.source}" if datainfo.source else "" + + html = [ + f"{escape(datainfo.title)}{year}{source}", + f"

{datainfo.description}

", + ] + if seealso := make_html_list(datainfo.seealso): html.append("See Also\n" + seealso + "") - refs = make_html_list(datainfo.references) - if refs: + if refs := make_html_list(datainfo.references): html.append("References\n" + refs + "") return "\n".join(html) diff --git a/Orange/widgets/data/owdiscretize.py b/Orange/widgets/data/owdiscretize.py index 82043483a0f..fe1870b4be8 100644 --- a/Orange/widgets/data/owdiscretize.py +++ b/Orange/widgets/data/owdiscretize.py @@ -102,9 +102,11 @@ def _fixed_time_width_discretization( def _mdl_discretization( data: Table, var: Union[ContinuousVariable, str, int]) -> Union[DiscreteVariable, str]: - if not data.domain.has_discrete_class: - return "no discrete class" - return disc.EntropyMDL()(data, var) + return ( + disc.EntropyMDL()(data, var) + if data.domain.has_discrete_class + else "no discrete class" + ) def _custom_discretization( @@ -130,9 +132,11 @@ def _custom_discretization( cuts = [] if any(x >= y for x, y in zip(cuts, cuts[1:])): cuts = [] - if not cuts: - return "invalid cuts" - return disc.Discretizer.create_discretized_var(var, cuts) + return ( + disc.Discretizer.create_discretized_var(var, cuts) + if cuts + else "invalid cuts" + ) class Methods(IntEnum): @@ -336,10 +340,9 @@ def data(self, index, role=Qt.DisplayRole): return None if len(data.values) <= 3: return f'

{tip}' \ - f'{",  ".join(values)}

' + f'{",  ".join(values)}

' else: - return tip + "
" \ - + "".join(f"- {value}
" for value in values) + return (f"{tip}
" + "".join(f"- {value}
" for value in values)) value = super().data(index, role) if role == Qt.DisplayRole: hint, points, values = index.data(Qt.UserRole) @@ -370,7 +373,7 @@ def columnCount(parent): def data(self, _, role=Qt.DisplayRole): if role == Qt.DisplayRole: - return "Default setting: " + format_desc(self.hint) + return f"Default setting: {format_desc(self.hint)}" elif role == Qt.DecorationRole: return DefaultDiscModel.icon elif role == Qt.ToolTipRole: @@ -397,8 +400,8 @@ def validate(string: str, pos: int) -> Tuple[QValidator.State, str, int]: return QValidator.Invalid, string, i prev = None if pos == len(string) >= 2 \ - and string[-1] == " " and string[-2].isdigit(): - string = string[:-1] + ", " + and string[-1] == " " and string[-2].isdigit(): + string = f"{string[:-1]}, " pos += 1 for valuestr in re_custom_sep.split(string.strip()): try: @@ -415,7 +418,7 @@ def show_tip( widget: QWidget, pos: QPoint, text: str, timeout=-1, textFormat=Qt.AutoText, wordWrap=None): """Show a tooltip; used for invalid custom thresholds""" - propname = __name__ + "::show_tip_qlabel" + propname = f"{__name__}::show_tip_qlabel" if timeout < 0: timeout = widget.toolTipDuration() if timeout < 0: @@ -585,7 +588,7 @@ def update(): colors = { QValidator.Intermediate: (Qt.yellow, Qt.black), QValidator.Invalid: (Qt.red, Qt.black), - }.get(state, None) + }.get(state) if colors is None: palette = QPalette() else: @@ -600,6 +603,7 @@ def update(): textFormat=Qt.RichText) else: validator.show_tip(edit, p, "") + return edit, edit.textChanged children = [] @@ -801,8 +805,7 @@ def _update_discretizations(self): DiscDesc(var_hint, points, values), Qt.UserRole) - def _discretize_var(self, var: ContinuousVariable, hint: VarHint) \ - -> Tuple[str, Optional[Variable]]: + def _discretize_var(self, var: ContinuousVariable, hint: VarHint) -> Tuple[str, Optional[Variable]]: """ Discretize using method and data in the hint. @@ -814,9 +817,8 @@ def _discretize_var(self, var: ContinuousVariable, hint: VarHint) \ if isinstance(var, TimeVariable): if hint.method_id in (Methods.FixedWidth, Methods.Custom): return ": ", var - else: - if hint.method_id == Methods.FixedWidthTime: - return ": ", var + elif hint.method_id == Methods.FixedWidthTime: + return ": ", var function = Options[hint.method_id].function dvar = function(self.data, var, *hint.args) @@ -850,8 +852,8 @@ def _copy_to_manual(self): texts = set() for key in varkeys: dvar = self.discretized_vars.get(key) - fmt = self.data.domain[key[0]].repr_val if isinstance(dvar, DiscreteVariable): + fmt = self.data.domain[key[0]].repr_val text = ", ".join(map(fmt, dvar.compute_value.points)) texts.add(text) self.var_hints[key] = VarHint(Methods.Custom, (text, )) @@ -921,10 +923,7 @@ def _update_interface(self): self._uncheck_all_buttons() return - if mset == [None]: - method_id, args = Methods.Default, () - else: - method_id, args = mset.pop() + method_id, args = (Methods.Default, ()) if mset == [None] else mset.pop() self._check_button(method_id, True) self._set_values(method_id, args) finally: diff --git a/Orange/widgets/data/oweditdomain.py b/Orange/widgets/data/oweditdomain.py index 8443687b59c..028d747a4ce 100644 --- a/Orange/widgets/data/oweditdomain.py +++ b/Orange/widgets/data/oweditdomain.py @@ -69,9 +69,7 @@ def unique(sequence: Iterable[H]) -> Iterable[H]: class _DataType: def __eq__(self, other): """Equal if `other` has the same type and all elements compare equal.""" - if type(self) is not type(other): - return False - return super().__eq__(other) + return False if type(self) is not type(other) else super().__eq__(other) def __ne__(self, other): return not self == other @@ -856,16 +854,17 @@ def get_dialog_settings(self) -> Dict[str, Any]: "n_values_spin": self.n_values_spin.value(), "name_line_edit": self.new_name_line_edit.text() } - checked = [ - i for i, s in enumerate( - [self.frequent_abs_radio, - self.frequent_rel_radio, - self.n_values_radio] - ) if s.isChecked()] - # when checked empty radio button for selected values is selected - # it is not stored in setting since its selection depends on users - # selection of values in list - if checked: + if checked := [ + i + for i, s in enumerate( + [ + self.frequent_abs_radio, + self.frequent_rel_radio, + self.n_values_radio, + ] + ) + if s.isChecked() + ]: settings_dict["selected_radio"] = checked[0] return settings_dict @@ -1308,7 +1307,7 @@ def set_data_categorical(self, var, values, transform=()): SourcePosRole: ci_index[ci], SourceNameRole: ci } - elif ci is not None and cj is not None: + elif ci is not None: # rename or reorder item = { Qt.EditRole: cj, @@ -1355,10 +1354,7 @@ def __categories_mapping(self): midx = model.index(i, 0) category = midx.data(Qt.EditRole) source_pos = midx.data(SourcePosRole) # type: Optional[int] - if source_pos is not None: - source_name = source[source_pos] - else: - source_name = None + source_name = source[source_pos] if source_pos is not None else None state = midx.data(EditStateRole) if state == ItemEditState.Dropped: res.append((source_name, None)) @@ -1447,8 +1443,7 @@ def _remove_category(self): # new level -> remove it model.removeRow(index.row()) else: - assert False, "invalid state '{}' for {}" \ - .format(state, index.row()) + assert False, f"invalid state '{state}' for {index.row()}" def _add_category(self): """ @@ -1520,8 +1515,7 @@ def _rename_selected_categories(self): selmodel = view.selectionModel() index = view.currentIndex() if not selmodel.isSelected(index): - indices = selmodel.selectedRows(0) - if indices: + if indices := selmodel.selectedRows(0): index = indices[0] # delegate to the CategoriesEditDelegate view.edit(index) @@ -1643,15 +1637,13 @@ def helpEvent(self, event: QHelpEvent, view: QAbstractItemView, option: QStyleOptionViewItem, index: QModelIndex) -> bool: multiplicity = index.data(MultiplicityRole) name = VariableListModel.effective_name(index) - if isinstance(multiplicity, int) and multiplicity > 1 \ - and name is not None: - QToolTip.showText( - event.globalPos(), f"Name `{name}` is duplicated", - view.viewport() - ) - return True - else: # pragma: no cover + if not isinstance(multiplicity, int) or multiplicity <= 1 or name is None: return super().helpEvent(event, view, option, index) + QToolTip.showText( + event.globalPos(), f"Name `{name}` is duplicated", + view.viewport() + ) + return True # Item model for edited variables (Variable). Define a display role to be the @@ -1756,7 +1748,7 @@ def decorate(editor: VariableEditor) -> VariableEditor: self.layout().addWidget(sedit) self.layout().addWidget(tedit) - def set_data(self, data, transform=()): # pylint: disable=arguments-differ + def set_data(self, data, transform=()): # pylint: disable=arguments-differ # type: (Optional[DataVector], Sequence[Transform]) -> None """ Set the editor data. @@ -1783,10 +1775,7 @@ def set_data(self, data, transform=()): # pylint: disable=arguments-differ if type_transform is not None and data is not None: data = type_transform(data) - if data is not None: - var = data.vtype - else: - var = None + var = data.vtype if data is not None else None index = self._editors.get(type(var), -1) self.layout().setCurrentIndex(index) if index != -1: @@ -1837,29 +1826,25 @@ def __reinterpret_activated(self, index): cb.setFocus() target = cb.itemData(index, Qt.UserRole) assert issubclass(target, VariableTypes) - if not isinstance(var, target): - if target == Real: - transform = AsContinuous() - elif target == Categorical: - transform = AsCategorical() - elif target == Time: - transform = AsTime() - elif target == String: - transform = AsString() - else: + if isinstance(var, target): transform = None var = self.var + elif target == Real: + transform = AsContinuous() + elif target == Categorical: + transform = AsCategorical() + elif target == Time: + transform = AsTime() + elif target == String: + transform = AsString() self.__transform = transform data = None if transform is not None and self.__data is not None: data = transform(self.__data) var = data.vtype - if var in self.__history: - tr = self.__history[var] - else: - tr = [] + tr = self.__history[var] if var in self.__history else [] # type specific transform specific = Specific.get(type(var), ()) # merge tr and _tr @@ -2062,8 +2047,7 @@ def _restore(self, ): for i in range(model.rowCount()): midx = model.index(i, 0) coldesc = model.data(midx, Qt.EditRole) # type: DataVector - tr = self._restore_transform(coldesc.vtype) - if tr: + if tr := self._restore_transform(coldesc.vtype): model.setData(midx, tr, TransformRole) # Restore the current variable selection @@ -2247,11 +2231,9 @@ def send_report(self): model.data(midx, TransformRole)) for i in range(model.rowCount()) for midx in [model.index(i)]) - parts = [] - for vector, trs in state: - if trs: - parts.append(report_transform(vector.vtype, trs)) - if parts: + if parts := [ + report_transform(vector.vtype, trs) for vector, trs in state if trs + ]: html = ("
    " + "".join(map("
  • {}
  • ".format, parts)) + "
") @@ -2264,46 +2246,46 @@ def send_report(self): @classmethod def migrate_context(cls, context, version): # pylint: disable=bad-continuation - if version is None or version <= 1: - hints_ = context.values.get("domain_change_hints", ({}, -2))[0] - store = [] - ns = "Orange.data.variable" - mapping = { - "DiscreteVariable": - lambda name, args, attrs: - ("Categorical", (name, tuple(args[0][1]), ())), - "TimeVariable": - lambda name, _, attrs: - ("Time", (name, ())), - "ContinuousVariable": - lambda name, _, attrs: - ("Real", (name, (3, "f"), ())), - "StringVariable": - lambda name, _, attrs: - ("String", (name, ())), - } - for (module, class_name, *rest), target in hints_.items(): - if module != ns: - continue - f = mapping.get(class_name) - if f is None: - continue - trs = [] - key_mapped = f(*rest) - item_mapped = f(*target[2:]) - src = reconstruct(*key_mapped) # type: Variable - dst = reconstruct(*item_mapped) # type: Variable - if src.name != dst.name: - trs.append(Rename(dst.name)) - if src.annotations != dst.annotations: - trs.append(Annotate(dst.annotations)) - if isinstance(src, Categorical): - if src.categories != dst.categories: - assert len(src.categories) == len(dst.categories) - trs.append(CategoriesMapping( - list(zip(src.categories, dst.categories)))) - store.append((deconstruct(src), [deconstruct(tr) for tr in trs])) - context.values["_domain_change_store"] = (dict(store), -2) + if version is not None and version > 1: + return + hints_ = context.values.get("domain_change_hints", ({}, -2))[0] + store = [] + ns = "Orange.data.variable" + mapping = { + "DiscreteVariable": + lambda name, args, attrs: + ("Categorical", (name, tuple(args[0][1]), ())), + "TimeVariable": + lambda name, _, attrs: + ("Time", (name, ())), + "ContinuousVariable": + lambda name, _, attrs: + ("Real", (name, (3, "f"), ())), + "StringVariable": + lambda name, _, attrs: + ("String", (name, ())), + } + for (module, class_name, *rest), target in hints_.items(): + if module != ns: + continue + f = mapping.get(class_name) + if f is None: + continue + trs = [] + key_mapped = f(*rest) + item_mapped = f(*target[2:]) + src = reconstruct(*key_mapped) # type: Variable + dst = reconstruct(*item_mapped) # type: Variable + if src.name != dst.name: + trs.append(Rename(dst.name)) + if src.annotations != dst.annotations: + trs.append(Annotate(dst.annotations)) + if isinstance(src, Categorical) and src.categories != dst.categories: + assert len(src.categories) == len(dst.categories) + trs.append(CategoriesMapping( + list(zip(src.categories, dst.categories)))) + store.append((deconstruct(src), [deconstruct(tr) for tr in trs])) + context.values["_domain_change_store"] = (dict(store), -2) def enumerate_columns( @@ -2512,10 +2494,7 @@ def apply_transform(var, table, trs): reinterpret, trs = trs[0], trs[1:] coldata = table_column_data(table, var) var = apply_reinterpret(var, reinterpret, coldata) - if trs: - return apply_transform_var(var, trs) - else: - return var + return apply_transform_var(var, trs) if trs else var def requires_unlink(var: Orange.data.Variable, trs: List[Transform]) -> bool: @@ -2795,10 +2774,7 @@ def orange_isna(variable: Orange.data.Variable, data: ndarray) -> ndarray: """ Return a bool mask masking N/A elements in `data` for the `variable`. """ - if variable.is_primitive(): - return np.isnan(data) - else: - return data == variable.Unknown + return np.isnan(data) if variable.is_primitive() else data == variable.Unknown class ToStringTransform(Transformation): @@ -2818,9 +2794,7 @@ def transform(self, c): class ToContinuousTransform(Transformation): def transform(self, c): - if self.variable.is_time: - return c - elif self.variable.is_continuous: + if self.variable.is_time or self.variable.is_continuous: return c elif self.variable.is_discrete: lookup = Lookup( diff --git a/Orange/widgets/data/owfeatureconstructor.py b/Orange/widgets/data/owfeatureconstructor.py index 143861424c9..cd52672cf25 100644 --- a/Orange/widgets/data/owfeatureconstructor.py +++ b/Orange/widgets/data/owfeatureconstructor.py @@ -109,8 +109,7 @@ def selected_row(view): raise ValueError("invalid 'selectionMode'") sel_model = view.selectionModel() - indexes = sel_model.selectedRows() - if indexes: + if indexes := sel_model.selectedRows(): assert len(indexes) == 1 return indexes[0].row() else: @@ -240,12 +239,12 @@ def on_funcs_changed(self): func = self.funcs_model[index] if func in ["atan2", "fmod", "ldexp", "log", "pow", "copysign", "hypot"]: - self.insert_into_expression(func + "(,)") + self.insert_into_expression(f"{func}(,)") self.expressionedit.cursorBackward(False, 2) elif func in ["e", "pi"]: self.insert_into_expression(func) else: - self.insert_into_expression(func + "()") + self.insert_into_expression(f"{func}()") self.expressionedit.cursorBackward(False) self.functionscb.setCurrentIndex(0) @@ -357,16 +356,15 @@ def variable_icon(dtype): class FeatureItemDelegate(QStyledItemDelegate): @staticmethod def displayText(value, _): - return value.name + " := " + value.expression + return f"{value.name} := {value.expression}" class DescriptorModel(itemmodels.PyListModel): def data(self, index, role=Qt.DisplayRole): - if role == Qt.DecorationRole: - value = self[index.row()] - return variable_icon(type(value)) - else: + if role != Qt.DecorationRole: return super().data(index, role) + value = self[index.row()] + return variable_icon(type(value)) def freevars(exp: ast.AST, env: List[str]): @@ -503,9 +501,7 @@ def is_valid_item(self, setting, item, attrs, metas): for var in metas: available[sanitized_name(var)] = None - if freevars(exp_ast, list(available)): - return False - return True + return not freevars(exp_ast, list(available)) class OWFeatureConstructor(OWWidget, ConcurrentWidgetMixin): @@ -817,9 +813,9 @@ def apply(self): def on_done(self, result: "Result") -> None: data, attrs = result.data, result.attributes - disc_attrs_not_ok = self.check_attrs_values( - [var for var in attrs if var.is_discrete], data) - if disc_attrs_not_ok: + if disc_attrs_not_ok := self.check_attrs_values( + [var for var in attrs if var.is_discrete], data + ): self.Error.more_values_needed(disc_attrs_not_ok) return @@ -1031,7 +1027,7 @@ def construct_variables(descriptions, data, use_values=False): def sanitized_name(name): sanitized = re.sub(r"\W", "_", name) if sanitized[0].isdigit(): - sanitized = "_" + sanitized + sanitized = f"_{sanitized}" return sanitized @@ -1081,9 +1077,7 @@ def bind_variable(descriptor, env, data, use_values): def cast_datetime(e): if isinstance(e, _cast_datetime_num_types): return e - if e == "" or e is None: - return np.nan - return _parse_datetime(e) + return np.nan if e == "" or e is None else _parse_datetime(e) _cast_datetime = frompyfunc(cast_datetime, 1, 1, dtype=float) @@ -1188,43 +1182,50 @@ def make_lambda(expression, args, env=None): "zip" ] -__GLOBALS = {name: getattr(builtins, name) for name in __ALLOWED - if hasattr(builtins, name)} - -__GLOBALS.update({name: getattr(math, name) for name in dir(math) - if not name.startswith("_")}) - -__GLOBALS.update({ - "normalvariate": random.normalvariate, - "gauss": random.gauss, - "expovariate": random.expovariate, - "gammavariate": random.gammavariate, - "betavariate": random.betavariate, - "lognormvariate": random.lognormvariate, - "paretovariate": random.paretovariate, - "vonmisesvariate": random.vonmisesvariate, - "weibullvariate": random.weibullvariate, - "triangular": random.triangular, - "uniform": random.uniform, - "nanmean": lambda *args: np.nanmean(args), - "nanmin": lambda *args: np.nanmin(args), - "nanmax": lambda *args: np.nanmax(args), - "nansum": lambda *args: np.nansum(args), - "nanstd": lambda *args: np.nanstd(args), - "nanmedian": lambda *args: np.nanmedian(args), - "nancumsum": lambda *args: np.nancumsum(args), - "nancumprod": lambda *args: np.nancumprod(args), - "nanargmax": lambda *args: np.nanargmax(args), - "nanargmin": lambda *args: np.nanargmin(args), - "nanvar": lambda *args: np.nanvar(args), - "mean": lambda *args: np.mean(args), - "std": lambda *args: np.std(args), - "median": lambda *args: np.median(args), - "cumsum": lambda *args: np.cumsum(args), - "cumprod": lambda *args: np.cumprod(args), - "argmax": lambda *args: np.argmax(args), - "argmin": lambda *args: np.argmin(args), - "var": lambda *args: np.var(args)}) +__GLOBALS = ( + { + name: getattr(builtins, name) + for name in __ALLOWED + if hasattr(builtins, name) + } + | { + name: getattr(math, name) + for name in dir(math) + if not name.startswith("_") + } + | { + "normalvariate": random.normalvariate, + "gauss": random.gauss, + "expovariate": random.expovariate, + "gammavariate": random.gammavariate, + "betavariate": random.betavariate, + "lognormvariate": random.lognormvariate, + "paretovariate": random.paretovariate, + "vonmisesvariate": random.vonmisesvariate, + "weibullvariate": random.weibullvariate, + "triangular": random.triangular, + "uniform": random.uniform, + "nanmean": lambda *args: np.nanmean(args), + "nanmin": lambda *args: np.nanmin(args), + "nanmax": lambda *args: np.nanmax(args), + "nansum": lambda *args: np.nansum(args), + "nanstd": lambda *args: np.nanstd(args), + "nanmedian": lambda *args: np.nanmedian(args), + "nancumsum": lambda *args: np.nancumsum(args), + "nancumprod": lambda *args: np.nancumprod(args), + "nanargmax": lambda *args: np.nanargmax(args), + "nanargmin": lambda *args: np.nanargmin(args), + "nanvar": lambda *args: np.nanvar(args), + "mean": lambda *args: np.mean(args), + "std": lambda *args: np.std(args), + "median": lambda *args: np.median(args), + "cumsum": lambda *args: np.cumsum(args), + "cumprod": lambda *args: np.cumprod(args), + "argmax": lambda *args: np.argmax(args), + "argmin": lambda *args: np.argmin(args), + "var": lambda *args: np.var(args), + } +) class FeatureFunc: @@ -1274,10 +1275,7 @@ def __call_table(self, table): else: raise - if not cols: - args = [()] * len(table) - else: - args = zip(*cols) + args = zip(*cols) if cols else [()] * len(table) f = self.func if self.mask_exceptions: y = list(starmap(ftry(f, Exception, np.nan), args)) diff --git a/Orange/widgets/data/owfeaturestatistics.py b/Orange/widgets/data/owfeaturestatistics.py index 2a133b4206c..e226c9604a0 100644 --- a/Orange/widgets/data/owfeaturestatistics.py +++ b/Orange/widgets/data/owfeaturestatistics.py @@ -211,9 +211,7 @@ def clear(self): @property def variables(self): matrices = [self.__attributes[0], self.__class_vars[0], self.__metas[0]] - if not any(m.size for m in matrices): - return [] - return np.hstack(matrices) + return np.hstack(matrices) if any(m.size for m in matrices) else [] @staticmethod def _attr_indices(attrs): @@ -506,9 +504,8 @@ def _argsortData(self, data, order): def headerData(self, section, orientation, role): # type: (int, Qt.Orientation, Qt.ItemDataRole) -> Any - if orientation == Qt.Horizontal: - if role == Qt.DisplayRole: - return self.Columns.from_index(section).name + if orientation == Qt.Horizontal and role == Qt.DisplayRole: + return self.Columns.from_index(section).name return None @@ -885,10 +882,7 @@ def send_report(self): @classmethod def migrate_context(cls, context, version): if not version or version < 2: - selected_rows = context.values.pop("selected_rows", None) - if not selected_rows: - selected_vars = [] - else: + if selected_rows := context.values.pop("selected_rows", None): # This assumes that dict was saved by Python >= 3.6 so dict is # ordered; if not, context hasn't had worked anyway. all_vars = [ @@ -900,6 +894,8 @@ def migrate_context(cls, context, version): # was the only hidden var when settings_version < 2, so: if tpe != 3] selected_vars = [all_vars[i] for i in selected_rows] + else: + selected_vars = [] context.values["selected_vars"] = selected_vars, -3 diff --git a/Orange/widgets/data/owfile.py b/Orange/widgets/data/owfile.py index 35fc2691bf5..629655a2b8e 100644 --- a/Orange/widgets/data/owfile.py +++ b/Orange/widgets/data/owfile.py @@ -65,9 +65,7 @@ def __init__(self, mapping): def data(self, index, role=Qt.DisplayRole): data = super().data(index, role) - if role == Qt.DisplayRole: - return self.mapping.get(data, data) - return data + return self.mapping.get(data, data) if role == Qt.DisplayRole else data def add_name(self, url, name): self.mapping[url] = name @@ -177,9 +175,8 @@ def group_readers_per_addon_key(w): def package(w): package = w.qualified_name().split(".")[:-1] package = package[:2] - if ".".join(package) == "Orange.data": - return ["0"] # force "Orange" to come first - return package + return ["0"] if ".".join(package) == "Orange.data" else package + return package(w), w.DESCRIPTION self.available_readers = sorted(set(readers), @@ -304,7 +301,7 @@ def package(w): if self.source == self.LOCAL_FILE: last_path = self.last_path() if last_path and os.path.exists(last_path) and \ - os.path.getsize(last_path) > self.SIZE_LIMIT: + os.path.getsize(last_path) > self.SIZE_LIMIT: self.Warning.file_too_big() return @@ -334,14 +331,13 @@ def select_reader(self, n): path = self.recent_paths[0] if n == 0: # default path.file_format = None - self.load_data() elif n <= len(self.available_readers): reader = self.available_readers[n - 1] path.file_format = reader.qualified_name() - self.load_data() else: # the rest include just qualified names path.file_format = self.reader_combo.itemText(n) - self.load_data() + + self.load_data() def _url_set(self): index = self.url_combo.currentIndex() @@ -349,7 +345,7 @@ def _url_set(self): url = url.strip() if not urlparse(url).scheme: - url = 'http://' + url + url = f'http://{url}' self.url_combo.setItemText(index, url) if index != 0: @@ -392,8 +388,7 @@ def load_data(self): self.clear_messages() self.set_file_list() - error = self._try_load() - if error: + if error := self._try_load(): error() self.data = None self.sheet_box.hide() @@ -513,10 +508,7 @@ def _initialize_reader_combo(self): @staticmethod def _describe(table): def missing_prop(prop): - if prop: - return f"({prop * 100:.1f}% missing values)" - else: - return "(no missing values)" + return f"({prop * 100:.1f}% missing values)" if prop else "(no missing values)" domain = table.domain text = "" @@ -542,12 +534,12 @@ def missing_prop(prop): elif domain.has_discrete_class: nvals = len(domain.class_var.values) text += "
Classification; categorical class " \ - f"with {nvals} {pl(nvals, 'value')} {missing_in_class}" + f"with {nvals} {pl(nvals, 'value')} {missing_in_class}" elif table.domain.class_vars: ntargets = len(table.domain.class_vars) text += "
Multi-target; " \ - f"{ntargets} target {pl(ntargets, 'variable')} " \ - f"{missing_in_class}" + f"{ntargets} target {pl(ntargets, 'variable')} " \ + f"{missing_in_class}" else: text += "
Data has no target variable." nmetas = len(domain.metas) @@ -557,7 +549,7 @@ def missing_prop(prop): if 'Timestamp' in table.domain: # Google Forms uses this header to timestamp responses text += f"

First entry: {table[0, 'Timestamp']}
" \ - f"Last entry: {table[-1, 'Timestamp']}

" + f"Last entry: {table[-1, 'Timestamp']}

" return text def storeSpecificSettings(self): @@ -623,8 +615,9 @@ def get_ext_name(filename): home = os.path.expanduser("~") if self.loaded_file.startswith(home): # os.path.join does not like ~ - name = "~" + os.path.sep + \ - self.loaded_file[len(home):].lstrip("/").lstrip("\\") + name = f"~{os.path.sep}" + self.loaded_file[len(home) :].lstrip( + "/" + ).lstrip("\\") else: name = self.loaded_file if self.sheet_combo.isVisible(): @@ -640,8 +633,7 @@ def get_ext_name(filename): @staticmethod def dragEnterEvent(event): """Accept drops of valid file urls""" - urls = event.mimeData().urls() - if urls: + if urls := event.mimeData().urls(): try: FileFormat.get_reader(urls[0].toLocalFile()) event.acceptProposedAction() @@ -650,8 +642,7 @@ def dragEnterEvent(event): def dropEvent(self, event): """Handle file drops""" - urls = event.mimeData().urls() - if urls: + if urls := event.mimeData().urls(): self.add_path(urls[0].toLocalFile()) # add first file self.source = self.LOCAL_FILE self.load_data() @@ -669,29 +660,27 @@ class OWFileDropHandler(SingleUrlDropHandler): WIDGET = OWFile def canDropUrl(self, url: QUrl) -> bool: - if url.isLocalFile(): - try: - FileFormat.get_reader(url.toLocalFile()) - return True - except Exception: # noqa # pylint:disable=broad-except - return False - else: + if not url.isLocalFile(): return url.scheme().lower() in ("http", "https", "ftp") + try: + FileFormat.get_reader(url.toLocalFile()) + return True + except Exception: # noqa # pylint:disable=broad-except + return False def parametersFromUrl(self, url: QUrl) -> Dict[str, Any]: - if url.isLocalFile(): - path = url.toLocalFile() - r = RecentPath(os.path.abspath(path), None, None, - os.path.basename(path)) - return { - "recent_paths": stored_recent_paths_prepend(self.WIDGET, r), - "source": OWFile.LOCAL_FILE, - } - else: + if not url.isLocalFile(): return { "recent_urls": [url.toString()], "source": OWFile.URL, } + path = url.toLocalFile() + r = RecentPath(os.path.abspath(path), None, None, + os.path.basename(path)) + return { + "recent_paths": stored_recent_paths_prepend(self.WIDGET, r), + "source": OWFile.LOCAL_FILE, + } if __name__ == "__main__": # pragma: no cover diff --git a/Orange/widgets/data/owgroupby.py b/Orange/widgets/data/owgroupby.py index 420ca9ded07..43daa439543 100644 --- a/Orange/widgets/data/owgroupby.py +++ b/Orange/widgets/data/owgroupby.py @@ -54,7 +54,7 @@ def concatenate(x): Concatenate values of series if value is not missing (nan or empty string for StringVariable) """ - return " ".join(str(v) for v in x if not pd.isnull(v) and len(str(v)) > 0) + return " ".join(str(v) for v in x if not pd.isnull(v) and str(v) != "") def std(s): @@ -228,14 +228,13 @@ def data(self, index, role=Qt.DisplayRole) -> Any: if role in (Qt.DisplayRole, Qt.EditRole): if col == TabColumn.attribute: return str(val) - else: # col == TabColumn.aggregations - # plot first two aggregations comma separated and write n more - # for others - aggs = sorted( - self.parent.aggregations.get(val, []), key=AGGREGATIONS_ORD.index - ) - n_more = "" if len(aggs) <= 3 else f" and {len(aggs) - 3} more" - return ", ".join(aggs[:3]) + n_more + # plot first two aggregations comma separated and write n more + # for others + aggs = sorted( + self.parent.aggregations.get(val, []), key=AGGREGATIONS_ORD.index + ) + n_more = "" if len(aggs) <= 3 else f" and {len(aggs) - 3} more" + return ", ".join(aggs[:3]) + n_more elif role == Qt.DecorationRole and col == TabColumn.attribute: return gui.attributeIconDict[val] return None @@ -292,7 +291,7 @@ def nextCheckState(self) -> None: else: agg = self.text() selected_attrs = self.parent.get_selected_attributes() - types = set(type(attr) for attr in selected_attrs) + types = {type(attr) for attr in selected_attrs} can_be_applied_all = types <= AGGREGATIONS[agg].types # true if aggregation applied to all attributes that can be @@ -485,8 +484,11 @@ def set_data(self, data: Table) -> None: self.openContext(self.data) # restore aggregations - self.aggregations.update({k: v for k, v in default_aggregations.items() - if k not in self.aggregations}) + self.aggregations |= { + k: v + for k, v in default_aggregations.items() + if k not in self.aggregations + } # update selections in widgets and re-plot self.agg_table_model.set_domain(data.domain if data else None) @@ -547,9 +549,8 @@ def migrate_context(cls, context, _): This function removes Sum from the context for TimeVariables (104) """ for var_, v in context.values["aggregations"][0].items(): - if len(var_) == 2: - if var_[1] == 104: - v.discard("Sum") + if len(var_) == 2 and var_[1] == 104: + v.discard("Sum") if __name__ == "__main__": diff --git a/Orange/widgets/data/owimpute.py b/Orange/widgets/data/owimpute.py index 0b77fe60951..f1bd0029206 100644 --- a/Orange/widgets/data/owimpute.py +++ b/Orange/widgets/data/owimpute.py @@ -122,7 +122,7 @@ class Method(enum.IntEnum): def var_key(var): # type: (Orange.data.Variable) -> Tuple[str, str] - qname = "{}.{}".format(type(var).__module__, type(var).__name__) + qname = f"{type(var).__module__}.{type(var).__name__}" return qname, var.name @@ -554,7 +554,7 @@ def send_report(self): for i, var in enumerate(self.varmodel): method = self.get_method_for_column(i) if not isinstance(method, AsDefault): - specific.append("{} ({})".format(var.name, str(method))) + specific.append(f"{var.name} ({str(method)})") default = self.create_imputer(Method.AsAboveSoBelow) if specific: diff --git a/Orange/widgets/data/owmelt.py b/Orange/widgets/data/owmelt.py index f75f1401ec8..f9453dcdb0e 100644 --- a/Orange/widgets/data/owmelt.py +++ b/Orange/widgets/data/owmelt.py @@ -38,9 +38,7 @@ def match(self, context, potential_ids): names = {var.name for var in potential_ids} if names == context.potential_ids: return self.PERFECT_MATCH - if context.values["idvar"] in names: - return self.MATCH - return self.NO_MATCH + return self.MATCH if context.values["idvar"] in names else self.NO_MATCH def encode_setting(self, context, setting, value): if setting.name == "idvar": diff --git a/Orange/widgets/data/owmergedata.py b/Orange/widgets/data/owmergedata.py index bb2da65bf5a..dc46b4c98f1 100644 --- a/Orange/widgets/data/owmergedata.py +++ b/Orange/widgets/data/owmergedata.py @@ -217,8 +217,7 @@ def settings_to_widget(self, widget, *_args): context = widget.current_context if context is None: return - pairs = context.values.get("attr_pairs") - if pairs: + if pairs := context.values.get("attr_pairs"): # attr_pairs is schema only setting which means it is not always # present. When not present leave widgets default. widget.attr_pairs = [ @@ -650,9 +649,7 @@ def migrate_settings(settings, version=None): def mig_value(x): if x == "Position (index)": return INDEX - if x == "Source position (index)": - return INSTANCEID - return x + return INSTANCEID if x == "Source position (index)" else x if not version: operations = ("augment", "merge", "combine") @@ -667,7 +664,7 @@ def mig_value(x): if not version or version < 2 and "attr_pairs" in settings: data_exists, extra_exists, attr_pairs = settings.pop("attr_pairs") - if not (data_exists and extra_exists): + if not data_exists or not extra_exists: settings["context_settings"] = [] return diff --git a/Orange/widgets/data/owneighbors.py b/Orange/widgets/data/owneighbors.py index 62d98a95c98..40446a2684b 100644 --- a/Orange/widgets/data/owneighbors.py +++ b/Orange/widgets/data/owneighbors.py @@ -128,10 +128,7 @@ def compute_distances(self): def commit(self): indices = self._compute_indices() - if indices is None: - neighbors = None - else: - neighbors = self._data_with_similarity(indices) + neighbors = None if indices is None else self._data_with_similarity(indices) self.Outputs.data.send(neighbors) def _compute_indices(self): diff --git a/Orange/widgets/data/owpaintdata.py b/Orange/widgets/data/owpaintdata.py index 12560ae790d..becaf604ddd 100644 --- a/Orange/widgets/data/owpaintdata.py +++ b/Orange/widgets/data/owpaintdata.py @@ -472,25 +472,24 @@ def selectionRect(self): return self._item.rect() def mousePressEvent(self, event): - if event.button() == Qt.LeftButton: - pos = self.mapToPlot(event.pos()) - if self._item.isVisible(): - if self.selectionRect().contains(pos): - # Allow the event to propagate to the item. - event.setAccepted(False) - self._item.setCursor(Qt.ClosedHandCursor) - return False + if event.button() != Qt.LeftButton: + return super().mousePressEvent(event) + pos = self.mapToPlot(event.pos()) + if self._item.isVisible() and self.selectionRect().contains(pos): + # Allow the event to propagate to the item. + event.setAccepted(False) + self._item.setCursor(Qt.ClosedHandCursor) + return False - self._mouse_dragging = True + self._mouse_dragging = True - self._start_pos = pos - self._item.setVisible(True) - self._plot.addItem(self._item) + self._start_pos = pos + self._item.setVisible(True) + self._plot.addItem(self._item) - self.setSelectionRect(QRectF(pos, pos)) - event.accept() - return True - return super().mousePressEvent(event) + self.setSelectionRect(QRectF(pos, pos)) + event.accept() + return True def mouseMoveEvent(self, event): if event.buttons() & Qt.LeftButton: @@ -1027,10 +1026,11 @@ def _check_and_set_data(data): y = data[:, y].Y self.input_has_attr2 = len(data.domain.attributes) >= 2 - if not self.input_has_attr2: - self.input_data = np.column_stack((X, np.zeros(len(data)), y)) - else: - self.input_data = np.column_stack((X, y)) + self.input_data = ( + np.column_stack((X, y)) + if self.input_has_attr2 + else np.column_stack((X, np.zeros(len(data)), y)) + ) self.reset_to_input() self.commit.now() @@ -1120,8 +1120,7 @@ def _class_value_changed(self, index, _): # self.undo_stack.push(command) def selected_class_label(self): - rows = self.classValuesView.selectedIndexes() - if rows: + if rows := self.classValuesView.selectedIndexes(): return rows[0].row() return None diff --git a/Orange/widgets/gui.py b/Orange/widgets/gui.py index 51e971ce73d..0f48af580b7 100644 --- a/Orange/widgets/gui.py +++ b/Orange/widgets/gui.py @@ -187,10 +187,7 @@ def sizeHint(self): def listView(widget, master, value=None, model=None, box=None, callback=None, sizeHint=None, *, viewType=ListViewWithSizeHint, **misc): - if box: - bg = vBox(widget, box, addToLayout=False) - else: - bg = widget + bg = vBox(widget, box, addToLayout=False) if box else widget view = viewType(preferred_size=sizeHint) if isinstance(view.model(), QSortFilterProxyModel): view.model().setSourceModel(model) @@ -244,10 +241,7 @@ def listBox(widget, master, value=None, labels=None, box=None, callback=None, :type sizeHint: QSize :rtype: OrangeListBox """ - if box: - bg = hBox(widget, box, addToLayout=False) - else: - bg = widget + bg = hBox(widget, box, addToLayout=False) if box else widget lb = OrangeListBox(master, enableDragDrop, dragDropCallback, dataValidityCallback, sizeHint, bg) lb.setSelectionMode(selectionMode) @@ -327,10 +321,7 @@ def __init__(self, master, enableDragDrop=False, dragDropCallback=None, super().__init__(*args) self.drop_callback = dragDropCallback self.valid_data_callback = dataValidityCallback - if not sizeHint: - self.size_hint = QtCore.QSize(150, 100) - else: - self.size_hint = sizeHint + self.size_hint = sizeHint or QtCore.QSize(150, 100) if enableDragDrop: self.setDragEnabled(True) self.setAcceptDrops(True) @@ -399,10 +390,7 @@ def __init__(self, content, listBox=None): # commonly used as a setting which gets synced into a GLOBAL # SettingsHandler and which keeps the OWWidget instance alive via a # reference in listBox (see gui.listBox) - if listBox is not None: - self.listBox = weakref.ref(listBox) - else: - self.listBox = lambda: None + self.listBox = weakref.ref(listBox) if listBox is not None else (lambda: None) def __reduce__(self): # cannot pickle self.listBox, but can't discard it @@ -413,10 +401,7 @@ def __reduce__(self): # TODO ControllgedList.item2name is probably never used def item2name(self, item): item = self.listBox().labels[item] - if isinstance(item, tuple): - return item[1] - else: - return item + return item[1] if isinstance(item, tuple) else item def __setitem__(self, index, item): def unselect(i): @@ -502,8 +487,7 @@ def __call__(self, *_): selection = self.view.selectionModel().selection() if isinstance(self.view.model(), QSortFilterProxyModel): selection = self.view.model().mapSelectionToSource(selection) - values = [i.row() for i in selection.indexes()] - if values: + if values := [i.row() for i in selection.indexes()]: # FIXME: irrespective of PyListModel check, this might/should always # callback with values! if isinstance(self.model, PyListModel): @@ -572,18 +556,19 @@ def action(self, values): class CallFrontListBox(ControlledCallFront): def action(self, value): - if value is not None: - if isinstance(value, int): - for i in range(self.control.count()): - self.control.item(i).setSelected(i == value) - else: - if not isinstance(value, ControlledList): - setattr(self.control.ogMaster, self.control.ogValue, - ControlledList(value, self.control)) - for i in range(self.control.count()): - shouldBe = i in value - if shouldBe != self.control.item(i).isSelected(): - self.control.item(i).setSelected(shouldBe) + if value is None: + return + if isinstance(value, int): + for i in range(self.control.count()): + self.control.item(i).setSelected(i == value) + else: + if not isinstance(value, ControlledList): + setattr(self.control.ogMaster, self.control.ogValue, + ControlledList(value, self.control)) + for i in range(self.control.count()): + shouldBe = i in value + if shouldBe != self.control.item(i).isSelected(): + self.control.item(i).setSelected(shouldBe) class CallFrontListBoxLabels(ControlledCallFront): diff --git a/Orange/widgets/settings.py b/Orange/widgets/settings.py index 3bbe31eb2c0..d4ee741028e 100644 --- a/Orange/widgets/settings.py +++ b/Orange/widgets/settings.py @@ -76,8 +76,8 @@ def __init__(self, *, match_values=0, first_match=True, **kwargs): for name in kwargs: warnings.warn( - "{} is not a valid parameter for DomainContextHandler" - .format(name), OrangeDeprecationWarning + f"{name} is not a valid parameter for DomainContextHandler", + OrangeDeprecationWarning, ) def encode_domain(self, domain): @@ -104,11 +104,11 @@ def encode_variables(attributes, encode_values): """Encode variables to a list mapping name to variable type or a list of values.""" - if not encode_values: - return {v.name: vartype(v) for v in attributes} - - return {v.name: v.values if v.is_discrete else vartype(v) - for v in attributes} + return ( + {v.name: v.values if v.is_discrete else vartype(v) for v in attributes} + if encode_values + else {v.name: vartype(v) for v in attributes} + ) def new_context(self, domain, attributes, metas): """Create a new context.""" @@ -181,9 +181,7 @@ def get_var(name): for name_type in data] if dtype == -4: return {get_var(name): val for (name, _), val in data.items()} - if dtype >= 100: - return get_var(data) - return value[0] + return get_var(data) if dtype >= 100 else value[0] else: return value @@ -267,9 +265,11 @@ def is_valid_item(self, setting, item, attrs, metas): Subclasses can override this method to checks data in alternative representations. """ - if not isinstance(item, tuple): - return True - return self._var_exists(setting, item, attrs, metas) + return ( + self._var_exists(setting, item, attrs, metas) + if isinstance(item, tuple) + else True + ) @staticmethod def is_encoded_var(value): @@ -284,11 +284,7 @@ class ClassValuesContextHandler(ContextHandler): def open_context(self, widget, classes): if isinstance(classes, Variable): - if classes.is_discrete: - classes = classes.values - else: - classes = None - + classes = classes.values if classes.is_discrete else None super().open_context(widget, classes) def new_context(self, classes): @@ -300,13 +296,12 @@ def match(self, context, classes): if isinstance(classes, Variable) and classes.is_continuous: return (self.PERFECT_MATCH if context.classes is None else self.NO_MATCH) + # variable.values used to be a list, and so were context.classes + # cast to tuple for compatibility with past contexts + if context.classes is not None and tuple(context.classes) == classes: + return self.PERFECT_MATCH else: - # variable.values used to be a list, and so were context.classes - # cast to tuple for compatibility with past contexts - if context.classes is not None and tuple(context.classes) == classes: - return self.PERFECT_MATCH - else: - return self.NO_MATCH + return self.NO_MATCH class PerfectDomainContextHandler(DomainContextHandler): @@ -358,22 +353,20 @@ def encode_setting(self, context, setting, value): """Same as is domain context handler, but handles separately stored class_vars.""" - if isinstance(setting, ContextSetting) and isinstance(value, str): - - def _candidate_variables(): - if not setting.exclude_attributes: - yield from itertools.chain(context.attributes, - context.class_vars) - if not setting.exclude_metas: - yield from context.metas - - for aname, atype in _candidate_variables(): - if aname == value: - return value, atype - - return value, -1 - else: + if not isinstance(setting, ContextSetting) or not isinstance(value, str): return super().encode_setting(context, setting, value) + def _candidate_variables(): + if not setting.exclude_attributes: + yield from itertools.chain(context.attributes, + context.class_vars) + if not setting.exclude_metas: + yield from context.metas + + for aname, atype in _candidate_variables(): + if aname == value: + return value, atype + + return value, -1 def migrate_str_to_variable(settings, names=None, none_placeholder=None): diff --git a/quietunittest.py b/quietunittest.py index be79688b4e1..17068753420 100644 --- a/quietunittest.py +++ b/quietunittest.py @@ -46,7 +46,7 @@ def write(self, s): def write_msg(self, s): if self.line_before_msg: _stdout.write("\n") - _stdout.write(self.preambule + " ... " + s) + _stdout.write(f"{self.preambule} ... {s}") self.line_before_msg = False self.preambule = "" @@ -68,8 +68,7 @@ def stopTest(self, test): @staticmethod def getDescription(test): - doc_first_line = test.shortDescription() - if doc_first_line: + if doc_first_line := test.shortDescription(): return '\n'.join((str(test), doc_first_line)) else: return str(test) @@ -91,9 +90,9 @@ def printErrors(self): def printErrorList(self, flavour, errors): for test, err in errors: print(self.separator1) - print("%s: %s" % (flavour,self.getDescription(test))) + print(f"{flavour}: {self.getDescription(test)}") print(self.separator2) - print("%s" % err) + print(f"{err}") _stdout = sys.stdout diff --git a/setup.py b/setup.py index f4660cd32b8..9d2d3cc6edd 100755 --- a/setup.py +++ b/setup.py @@ -84,12 +84,19 @@ requirements = ['requirements-core.txt', 'requirements-gui.txt'] -INSTALL_REQUIRES = sorted(set( - line.partition('#')[0].strip() - for file in (os.path.join(os.path.dirname(__file__), file) - for file in requirements) - for line in open(file) -) - {''}) +INSTALL_REQUIRES = sorted( + ( + { + line.partition('#')[0].strip() + for file in ( + os.path.join(os.path.dirname(__file__), file) + for file in requirements + ) + for line in open(file) + } + - {''} + ) +) EXTRAS_REQUIRE = {} @@ -170,7 +177,7 @@ def write_version_py(filename='Orange/version.py'): GIT_REVISION = "Unknown" if not ISRELEASED: - FULLVERSION += '.dev0+' + GIT_REVISION[:7] + FULLVERSION += f'.dev0+{GIT_REVISION[:7]}' a = open(filename, 'w') try: @@ -187,30 +194,40 @@ def write_version_py(filename='Orange/version.py'): # Extra non .py, .{so,pyd} files that are installed within the package dir # hierarchy PACKAGE_DATA = { - "Orange": ["datasets/*.{}".format(ext) - for ext in ["tab", "csv", "basket", "info", "dst", "metadata"]], + "Orange": [ + f"datasets/*.{ext}" + for ext in ["tab", "csv", "basket", "info", "dst", "metadata"] + ], "Orange.canvas": ["icons/*.png", "icons/*.svg"], "Orange.canvas.workflows": ["*.ows"], - "Orange.widgets": ["icons/*.png", - "icons/*.svg"], + "Orange.widgets": ["icons/*.png", "icons/*.svg"], "Orange.widgets.report": ["icons/*.svg", "*.html"], - "Orange.widgets.tests": ["datasets/*.tab", - "workflows/*.ows"], - "Orange.widgets.data": ["icons/*.svg", - "icons/paintdata/*.png", - "icons/paintdata/*.svg"], - "Orange.widgets.data.tests": ["origin1/*.tab", - "origin2/*.tab", - "*.txt", "*.tab"], + "Orange.widgets.tests": ["datasets/*.tab", "workflows/*.ows"], + "Orange.widgets.data": [ + "icons/*.svg", + "icons/paintdata/*.png", + "icons/paintdata/*.svg", + ], + "Orange.widgets.data.tests": [ + "origin1/*.tab", + "origin2/*.tab", + "*.txt", + "*.tab", + ], "Orange.widgets.evaluate": ["icons/*.svg"], "Orange.widgets.model": ["icons/*.svg"], "Orange.widgets.visualize": ["icons/*.svg"], "Orange.widgets.unsupervised": ["icons/*.svg"], "Orange.widgets.utils": ["_webview/*.js"], - "Orange.tests": ["xlsx_files/*.xlsx", "datasets/*.tab", - "xlsx_files/*.xls", - "datasets/*.basket", "datasets/*.csv", - "datasets/*.pkl", "datasets/*.pkl.gz"] + "Orange.tests": [ + "xlsx_files/*.xlsx", + "datasets/*.tab", + "xlsx_files/*.xls", + "datasets/*.basket", + "datasets/*.csv", + "datasets/*.pkl", + "datasets/*.pkl.gz", + ], } @@ -281,10 +298,7 @@ def find_htmlhelp_files(subdir): ) for file in files: relpath = os.path.relpath(file, start=subdir) - relsubdir = os.path.dirname(relpath) - # path.join("a", "") results in "a/"; distutils install_data does not - # accept paths that end with "/" on windows. - if relsubdir: + if relsubdir := os.path.dirname(relpath): targetdir = os.path.join(DATAROOTDIR, relsubdir) else: targetdir = DATAROOTDIR @@ -325,19 +339,19 @@ def add_with_option(option, help="", default=None, ): def decorator(cmdclass): # type: (Type[Command]) -> Type[Command] cmdclass.user_options = getattr(cmdclass, "user_options", []) + [ - ("with-" + option, None, help), - ("without-" + option, None, ""), + (f"with-{option}", None, help), + (f"without-{option}", None, ""), ] cmdclass.boolean_options = getattr(cmdclass, "boolean_options", []) + [ - ("with-" + option,), + (f"with-{option}",) ] cmdclass.negative_opt = dict( - getattr(cmdclass, "negative_opt", {}), **{ - "without-" + option: "with-" + option - } + getattr(cmdclass, "negative_opt", {}), + **{f"without-{option}": f"with-{option}"}, ) - setattr(cmdclass, "with_" + option, default) + setattr(cmdclass, f"with_{option}", default) return cmdclass + return decorator @@ -474,21 +488,12 @@ def setup_package(): 'sdist': sdist, 'build': build, 'build_htmlhelp': build_htmlhelp, - # Use install_data from distutils, not numpy.distutils. - # numpy.distutils insist all data files are installed in site-packages - 'install_data': install_data.install_data + 'install_data': install_data.install_data, + "build_ext": build_ext + if have_numpy and have_cython + else build_ext_error, } - if have_numpy and have_cython: - extra_args = {} - cmdclass["build_ext"] = build_ext - else: - # substitute a build_ext command with one that raises an error when - # building. In order to fully support `pip install` we need to - # survive a `./setup egg_info` without numpy so pip can properly - # query our install dependencies - extra_args = {} - cmdclass["build_ext"] = build_ext_error - + extra_args = {} setup( name=NAME, version=FULLVERSION,