Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e4b5ef2

Browse files
committed
Improve docs & lint
Signed-off-by: Claudio Spiess <[email protected]>
1 parent d150341 commit e4b5ef2

File tree

4 files changed

+39
-26
lines changed

4 files changed

+39
-26
lines changed

docs/README.md

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ PDL provides the following features:
2323

2424
The PDL interpreter takes a PDL program as input and generates data by executing its instructions (calling out to models, code, etc...).
2525

26-
See below for a quick reference, followed by [installation notes](#interpreter_installation) and an [overview](#overview) of the language. A more detailed description of the language features can be found in this [tutorial](https://ibm.github.io/prompt-declaration-language/tutorial).
26+
See below for a quick reference, followed by [installation notes](#interpreter-installation) and an [overview](#overview) of the language. A more detailed description of the language features can be found in this [tutorial](https://ibm.github.io/prompt-declaration-language/tutorial).
2727

2828

2929
## Quick Reference
@@ -50,13 +50,13 @@ pip install 'prompt-declaration-language[examples]'
5050

5151
The Live Explorer can be installed as follows (MacOS):
5252
```
53-
brew install pdl
53+
brew install pdl
5454
```
5555

5656
For other platforms, see installation notes.
5757

5858
You can run PDL with LLM models in local using [Ollama](https://ollama.com), or other cloud service.
59-
See [here](https://ibm.github.io/prompt-declaration-language/tutorial/#using-ollama-models) for
59+
See [here](https://ibm.github.io/prompt-declaration-language/tutorial/#using-ollama-models) for
6060
instructions on how to install an Ollama model locally.
6161

6262
Most examples in this repository use IBM Granite models on [Ollama](https://ollama.com) and some are on [Replicate](https://replicate.com/). In order to run these examples, you need to create a free account
@@ -172,7 +172,7 @@ text:
172172
temperature: 0
173173
```
174174

175-
Notice the syntactic differences. Model ids on watsonx start with `watsonx`.
175+
Notice the syntactic differences. Model ids on watsonx start with `watsonx`.
176176

177177
Watsonx also provides a text completion endpoint as shown in the following example. A text completion endpoint does not take chat
178178
templates into account:
@@ -299,10 +299,10 @@ When we execute this program with the PDL interpreter, we obtain the following t
299299
@SuppressWarnings("unchecked")
300300
public static Map<String, String> deserializeOffsetMap(String lastSourceOffset) throws IOException {
301301
Map<String, String> offsetMap;
302-
if (lastSourceOffset == null || lastSourceOffset.isEmpty()) {
303-
offsetMap = new HashMap<>();
302+
if (lastSourceOffset == null || lastSourceOffset.isEmpty()) {
303+
offsetMap = new HashMap<>();
304304
} else {
305-
offsetMap = JSON_MAPPER.readValue(lastSourceOffset, Map.class);
305+
offsetMap = JSON_MAPPER.readValue(lastSourceOffset, Map.class);
306306
}
307307
return offsetMap;
308308
}
@@ -364,10 +364,10 @@ When we execute this new program, we obtain the following:
364364
@SuppressWarnings("unchecked")
365365
public static Map<String, String> deserializeOffsetMap(String lastSourceOffset) throws IOException {
366366
Map<String, String> offsetMap;
367-
if (lastSourceOffset == null || lastSourceOffset.isEmpty()) {
368-
offsetMap = new HashMap<>();
367+
if (lastSourceOffset == null || lastSourceOffset.isEmpty()) {
368+
offsetMap = new HashMap<>();
369369
} else {
370-
offsetMap = JSON_MAPPER.readValue(lastSourceOffset, Map.class);
370+
offsetMap = JSON_MAPPER.readValue(lastSourceOffset, Map.class);
371371
}
372372
return offsetMap;
373373
}

docs/autopdl.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,4 +173,4 @@ This will report details about the optimization process, such as the number of c
173173
0% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 0/1,200 [ 0:00:01 < -:--:-- , ? it/s ]
174174
```
175175

176-
Once the process is complete, a file `optimized_gsm8k.pdl` is written in same directory as the source PDL file. This file contains the optimal configuration and is directly executable by the standard PDL interpreter. A log of the optimization process is written to `experiments/` by default.
176+
Once the process is complete, a file `optimized_gsm8k.pdl` is written in same directory as the source PDL file. This file contains the optimal configuration and is directly executable by the standard PDL interpreter. A log of the optimization process is written to `experiments/` by default.

examples/optimizer/optimize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import Any
66

77
import yaml
8-
from datasets import load_from_disk
8+
from datasets.load import load_from_disk
99
from fever_evaluator import FEVEREvaluator
1010
from gsm8k_evaluator import Gsm8kEvaluator
1111
from gsmhard_evaluator import GsmHardEvaluator

src/pdl/optimize/pdl_optimizer.py

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,13 @@ def sample_candidates(
160160
demo_name = self.config.demonstrations_variable_name
161161
candidates = []
162162

163+
num_demonstrations_set = {
164+
int(x) for x in self.config.variables.get("num_demonstrations", set())
165+
}
166+
163167
if (
164-
"prompt_pattern" in self.config.variables
165-
and "cot" in self.config.variables.get("prompt_pattern", [])
166-
and 0 in self.config.variables.get("num_demonstrations", [])
168+
"cot" in self.config.variables.get("prompt_pattern", [])
169+
and 0 in num_demonstrations_set
167170
):
168171
cot_candidate = {
169172
k: self.sample_random_index(v) for k, v in self.config.variables.items()
@@ -179,18 +182,18 @@ def sample_candidates(
179182

180183
candidates.append(cot_candidate)
181184

182-
zero_shots_seen = ["cot"]
185+
zero_shots_seen = {"cot"}
183186
while len(candidates) < num_candidates:
184187
variable_instance = {
185188
k: self.sample_random_index(v) for k, v in self.config.variables.items()
186189
}
187190
if (
188191
variable_instance.get("num_demonstrations") == 0
189-
and variable_instance.get("prompt_pattern") == "cot"
192+
and variable_instance.get("prompt_pattern") is not None
190193
):
191194
if variable_instance["prompt_pattern"] in zero_shots_seen:
192195
continue
193-
zero_shots_seen.append(variable_instance["prompt_pattern"])
196+
zero_shots_seen.add(variable_instance["prompt_pattern"])
194197

195198
num_demonstrations = int(
196199
variable_instance.get("num_demonstrations", self.num_demonstrations),
@@ -215,16 +218,26 @@ def sample_candidates(
215218
candidates.append(candidate)
216219

217220
if (
218-
"num_demonstrations"
219-
in self.config.variables # check if is variable in config
220-
and len(self.config.variables["num_demonstrations"])
221-
> 1 # check more than 1 option
222-
and 0 in [int(x) for x in self.config.variables["num_demonstrations"]]
223-
# check zeroshot is an option
221+
len(num_demonstrations_set) > 1 # check more than 1 option
222+
and 0 in num_demonstrations_set # check zeroshot is an option
224223
):
225-
zero_shotters = [x for x in candidates if x["num_demonstrations"] == 0]
224+
zero_shotters = [
225+
x.get("uuid") for x in candidates if x.get("num_demonstrations") == 0
226+
]
227+
variables_zs = self.config.variables.copy()
228+
variables_zs.pop("num_demonstrations", None)
229+
230+
max_zs = len(list(itertools.product(*variables_zs.values())))
231+
232+
if len(zero_shotters) > max_zs:
233+
logger.warning(
234+
"More zero-shot candidates (%d) than expected (%d; "
235+
"product of all variables). "
236+
"Identical duplicated candidates may waste compute.",
237+
len(zero_shotters),
238+
max_zs,
239+
)
226240

227-
assert len(zero_shotters) <= 3
228241
assert len(candidates) == num_candidates
229242
return candidates
230243

0 commit comments

Comments
 (0)