-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathPythonAutomation.cs
More file actions
439 lines (399 loc) · 18.6 KB
/
PythonAutomation.cs
File metadata and controls
439 lines (399 loc) · 18.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
using JupyterKernelManager;
using StatTag.Core.Exceptions;
using StatTag.Core.Interfaces;
using StatTag.Core.Models;
using StatTag.Core.Parser;
using StatTag.Core.Utility;
namespace Jupyter
{
public class PythonAutomation : JupyterAutomation
{
protected override sealed ICodeFileParser Parser { get; set; }
public const int MAX_ARRAY_DEPTH = 2;
private const char ArrayStart = '[';
private const char ArrayEnd = ']';
private const char Escape = '\\';
private const char Delimiter = ',';
private const char SingleQuote = '\'';
private const char DoubleQuote = '"';
private static readonly Regex ValidArrayString = new Regex("^\\s*\\[.*\\]\\s*$", RegexOptions.Multiline | RegexOptions.Singleline);
private static readonly Dictionary<string, string> PythonProfileCommands = new Dictionary<string, string>()
{
{ "import sys; print(sys.version)", "Version" },
{ "sys.executable", "Path" }
};
protected Configuration Config { get; set; }
public PythonAutomation(Configuration config) : base(Configuration.DefaultPythonKernel)
{
Config = config;
Parser = new PythonParser();
SelectKernel();
}
/// <summary>
/// Find the first installed kernel that matches our configured whitelist of Python kernels.
/// </summary>
private void SelectKernel()
{
var specs = new KernelSpecManager().GetAllSpecs().Keys;
foreach (var kernel in Config.PythonKernels)
{
var match = specs.FirstOrDefault(x => x.Equals(kernel, StringComparison.CurrentCultureIgnoreCase));
if (match != null)
{
KernelName = match;
return;
}
}
// It shouldn't have changed, but we will set the kernel explicitly to the default just in case.
KernelName = Configuration.DefaultPythonKernel;
}
/// <summary>
/// Provides information about the Python installation on the user's machine. This uses an instance of the
/// automation object to issue commands in order to gather system information.
/// </summary>
/// <param name="config">The configuration object needed to initialize the automation object</param>
/// <returns>A formatted string of system information, which can be displayed to the user</returns>
public static CheckResult InstallationInformation(Configuration config)
{
var builder = new StringBuilder();
var infoResult = new CheckResult();
try
{
var engine = new PythonAutomation(config);
if (engine.Initialize(null, new LogManager()))
{
foreach (var command in PythonProfileCommands)
{
var result = engine.RunCommand(command.Key, new Tag() { Type = Constants.TagType.Value });
if (result != null && result.ValueResult != null)
{
builder.AppendFormat("{0} : {1}\r\n", command.Value, string.Join("\r\n", result.ValueResult.Trim()));
}
}
infoResult.Result = true;
}
else
{
builder.AppendFormat("No Python environment with IPython support could be found");
infoResult.Result = false;
}
}
catch (Exception exc)
{
builder.AppendFormat(
"Unable to communicate with Python. Python or its kernel may not be installed or there might be other configuration issues.\r\n");
builder.AppendFormat("{0}\r\n", exc.Message);
infoResult.Result = false;
}
infoResult.Details = builder.ToString().Trim();
return infoResult;
}
public override CommandResult[] RunCommands(string[] commands, Tag tag = null)
{
// If there is no tag, and we're just running a big block of code, it's much easier if we can send that to
// the R engine at once. Otherwise we have to worry about collapsing commands, function definitions, etc.
if (tag == null)
{
commands = new[] { string.Join("\r\n", commands) };
}
else
{
// commands = ((RParser)Parser).CollapseMultiLineCommands(commands);
// When processing a tag, we need to keep it so the tag comments are at the beginning and end of the
// command array, and all actual code then needs to live in the middle in a single (combined) string.
// This is because Jupyter won't do incremental code execution, we need to send it our full block of
// commands at once, in a single string.
commands = CollapseTagCommandsArray(commands);
}
return base.RunCommands(commands, tag);
}
public override CommandResult HandleImageResult(Tag tag, string command, List<Message> result)
{
// If it's not an image tag, we won't even try to do any other checks.
if (tag.Type != Constants.TagType.Figure)
{
return null;
}
// If it's not the start tag (which wouldn't have results), try to get an image result
if (!Parser.IsTagStart(command))
{
// First, try pulling out a base64-encoded image from the response. If that doesn't work,
// check to see if there was a saved file.
var commandResult = base.HandleImageResult(tag, command, result);
if (commandResult != null)
{
return commandResult;
}
if (Parser.IsImageExport(command))
{
// Attempt to extract the save location (either a file name, relative path, or absolute path)
// If it is empty, we will assign one to the image based on the tag name, and use that so
// the image is properly imported.
var saveLocation = Parser.GetImageSaveLocation(command);
if (string.IsNullOrWhiteSpace(saveLocation))
{
saveLocation = "\"tmp\"";
}
return new CommandResult() { FigureResult = GetExpandedFilePath(saveLocation) };
}
}
return null;
}
/// <summary>
/// Return an expanded, full file path - accounting for variables, functions, relative paths, etc.
/// </summary>
/// <param name="saveLocation">An R command that will be translated into a file path.</param>
/// <returns>The full file path</returns>
protected string GetExpandedFilePath(string saveLocation)
{
//var fileLocation = RunCommand(saveLocation, new Tag() { Type = Constants.TagType.Value });
var fileLocation = RunCommand(string.Format("print({0})", saveLocation), new Tag() { Type = Constants.TagType.Value });
if (fileLocation == null)
{
return null;
}
var baseParser = (BaseParser) Parser;
if (baseParser != null && baseParser.IsRelativePath(fileLocation.ValueResult))
{
// Attempt to find the current working directory. If we are not able to find it, or the value we end up
// creating doesn't exist, we will just proceed with whatever image location we had previously.
var workingDirResult = RunCommand("import os; print(os.getcwd())", new Tag() { Type = Constants.TagType.Value });
if (workingDirResult != null)
{
var path = workingDirResult.ValueResult;
var correctedPath = Path.GetFullPath(Path.Combine(path, fileLocation.ValueResult));
if (File.Exists(correctedPath))
{
fileLocation.ValueResult = correctedPath;
}
}
}
return fileLocation.ValueResult;
}
/// <summary>
/// For a tag, handle the results from processing a command to determine if a table result can be derived.
/// </summary>
/// <param name="tag">The tag we are processing (if applicable)</param>
/// <param name="command">The Python command that was run</param>
/// <param name="result">A collection of Jupyter Message objects representing the results</param>
/// <returns>A Table object containing the table data, if a table can be extracted. Null otherwise.</returns>
public override CommandResult HandleTableResult(Tag tag, string command, List<Message> result)
{
if (tag.Type.Equals(Constants.TagType.Table) && !Parser.IsTagStart(command) && Parser.IsTableResult(command))
{
var message = result.FirstOrDefault();
var htmlValue = GetHtmlValueResult(message);
if (string.IsNullOrWhiteSpace(htmlValue))
{
return new CommandResult() {TableResult = ParseTableResult(GetTextValueResult(message))};
}
else
{
return new CommandResult() { TableResult = ParseHtmlTableResult(htmlValue) };
}
}
return null;
}
/// <summary>
/// Given a string containing HTML, extract a balanced table.
/// NOTE: This does not account for rowspan/colspan!
/// </summary>
/// <param name="valueString"></param>
/// <returns></returns>
public Table ParseHtmlTableResult(string valueString)
{
if (string.IsNullOrWhiteSpace(valueString))
{
return new Table();
}
var htmlFragment = new HtmlDocument();
htmlFragment.LoadHtml(valueString);
var table = htmlFragment.DocumentNode.SelectSingleNode(".//table");
if (table == null)
{
// If there is no table node present, we will return an empty table instead of an error.
return new Table();
}
// There's a lot of variation in tables - they can have THEAD and TBODY or not. They can use TH instead of TD in different places.
// We need to account for all of those scenarios.
var rows = table.SelectNodes(".//tr");
if (rows == null || rows.Count == 0)
{
return new Table();
}
// Keep in mind that rows and columns can be uneven. We're going to start with the assumption that the rows are right, and determine
// which column count (the max) best represents the total for this data.
int numRows = rows.Count;
int maxCols = 0;
var data = new List<List<string>>();
for (int rowIndex = 0; rowIndex < numRows; rowIndex++)
{
var row = rows[rowIndex];
var cols = row.SelectNodes("./td | ./th");
maxCols = Math.Max(maxCols, cols.Count);
data.Add(new List<string>(cols.Count));
foreach (var col in cols)
{
data[rowIndex].Add(FormatStringFromHtml(col.GetDirectInnerText()));
}
}
var dataTable = new Table(numRows, maxCols, TableUtil.MergeTableVectorsToArray(null, null, FlattenDataToArray(numRows, maxCols, data), numRows, maxCols));
return dataTable;
}
/// <summary>
/// Take a string result from the Python kernel and convert it into a Table structure that StatTag
/// can use to populate a table in Word.
/// </summary>
/// <param name="valueString">The string result from the Python kernel</param>
/// <returns>A populated Table structure</returns>
public Table ParseTableResult(string valueString)
{
// If the string has no data, we want to safely return an empty table. Similarly, if the table string does not start with
// an opening array char, we don't know how to process it.
if (string.IsNullOrWhiteSpace(valueString) || !ValidArrayString.IsMatch(valueString))
{
return new Table(0, 0, null);
}
// Go through each character and process the state change for each symbol, collecting data
// along the way. And yes, we are ignoring the nuances of Python collections and just referring
// to them as "arrays" within the code.
int arrayDepth = 0;
int rows = 0;
int cols = 0;
int maxCols = 0;
string currentValue = "";
char? activeQuoteChar = null;
bool rowDataTracked = false;
bool inOpenArray = false;
bool isEscaped = false;
var data = new List<List<string>>();
foreach (var letter in valueString.ToCharArray())
{
if (isEscaped)
{
isEscaped = false;
// Fall through so we pick up the escaped character
}
else if (letter.Equals(Escape))
{
isEscaped = true;
continue;
}
else if (letter.Equals(ArrayStart))
{
rowDataTracked = false;
inOpenArray = true;
currentValue = "";
arrayDepth++;
cols = 0;
if (arrayDepth > MAX_ARRAY_DEPTH)
{
throw new StatTagUserException("StatTag is only able to handle 2-dimensional collections within Python");
}
continue;
}
else if (letter.Equals(ArrayEnd))
{
// Close out the curent array if we have some data.
if (rowDataTracked)
{
cols++;
maxCols = Math.Max(maxCols, cols);
data[rows - 1].Add(currentValue);
}
currentValue = "";
arrayDepth--;
cols = 0;
inOpenArray = false;
rowDataTracked = false;
continue;
}
else if (letter.Equals(Delimiter))
{
// If we are outside of the array and find a delimiter between sub-arrays, we don't want to track this as
// having found data.
if (!inOpenArray)
{
continue;
}
cols++;
maxCols = Math.Max(maxCols, cols);
data[rows - 1].Add(currentValue);
currentValue = "";
continue;
}
else if ((letter.Equals(SingleQuote) || letter.Equals(DoubleQuote)))
{
if (!activeQuoteChar.HasValue)
{
// We have come across a quote of some sort, and we're
activeQuoteChar = letter;
continue;
}
else if (activeQuoteChar.Value.Equals(letter))
{
// Only close out the quote if it's a match.
activeQuoteChar = null;
continue;
}
// Otherwise, fall through and pick up the quote character because it's part of a string literal
// that we are tracking.
}
else if ((letter.Equals(' ') || letter.Equals('\t')) && !activeQuoteChar.HasValue)
{
// If we're not in a quoted string, we don't want to capture any whitespace. Because we need to
// respect the whitespace if it's quoted, we can't just trim the string at the end, so we have this
// check in place.
continue;
}
// If we haven't ruled it out, this character is part of a value that we are tracking and should
// be included in the string.
currentValue += letter;
// If this is our first time tracking data for our row, we need to initialize it
if (!rowDataTracked)
{
rowDataTracked = true;
rows++;
data.Add(new List<string>());
}
}
var table = new Table(rows, maxCols, TableUtil.MergeTableVectorsToArray(null, null, FlattenDataToArray(rows, maxCols, data), rows, maxCols));
return table;
}
/// <summary>
/// Because of how we built StatTag, we have some expectations and existing routines to flatten data to a 1D array.
/// This is the implementation for Python results. In the future we should consider revisiting this to avoid
/// the extra processing.
/// </summary>
/// <param name="rows"></param>
/// <param name="cols"></param>
/// <param name="data"></param>
/// <returns></returns>
private string[] FlattenDataToArray(int rows, int cols, List<List<string>> data)
{
if (rows == 0 || cols == 0)
{
return null;
}
var flattenedData = new string[rows*cols];
for (int row = 0; row < rows; row++)
{
var rowCols = data[row].Count;
for (int col = 0; col < cols; col++)
{
// Not every row is guaranteed to have the same number of columns. We will perform checks to see if
// that's the case, and if so we will provide a null placeholder.
flattenedData[(row * cols) + col] = col >= rowCols ? null : data[row][col];
}
}
return flattenedData;
}
}
}