|
| 1 | +from __future__ import print_function, absolute_import |
| 2 | +from converters.utils import remove_fake_files_url |
| 3 | + |
| 4 | +# Stdlib |
| 5 | +import codecs |
| 6 | +import io |
| 7 | +import logging |
| 8 | +import os |
| 9 | +import pprint |
| 10 | +from types import FunctionType |
| 11 | + |
| 12 | +# From IPython |
| 13 | +from IPython.nbformat import current as nbformat |
| 14 | + |
| 15 | +# local |
| 16 | + |
| 17 | +#----------------------------------------------------------------------------- |
| 18 | +# Class declarations |
| 19 | +#----------------------------------------------------------------------------- |
| 20 | + |
| 21 | +class ConversionException(Exception): |
| 22 | + pass |
| 23 | + |
| 24 | +class DocStringInheritor(type): |
| 25 | + """ |
| 26 | + This metaclass will walk the list of bases until the desired |
| 27 | + superclass method is found AND if that method has a docstring and only |
| 28 | + THEN does it attach the superdocstring to the derived class method. |
| 29 | +
|
| 30 | + Please use carefully, I just did the metaclass thing by following |
| 31 | + Michael Foord's Metaclass tutorial |
| 32 | + (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may |
| 33 | + have missed a step or two. |
| 34 | +
|
| 35 | + source: |
| 36 | + http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95 |
| 37 | + by Paul McGuire |
| 38 | + """ |
| 39 | + def __new__(meta, classname, bases, classDict): |
| 40 | + newClassDict = {} |
| 41 | + for attributeName, attribute in classDict.items(): |
| 42 | + if type(attribute) == FunctionType: |
| 43 | + # look through bases for matching function by name |
| 44 | + for baseclass in bases: |
| 45 | + if hasattr(baseclass, attributeName): |
| 46 | + basefn = getattr(baseclass, attributeName) |
| 47 | + if basefn.__doc__: |
| 48 | + attribute.__doc__ = basefn.__doc__ |
| 49 | + break |
| 50 | + newClassDict[attributeName] = attribute |
| 51 | + return type.__new__(meta, classname, bases, newClassDict) |
| 52 | + |
| 53 | +class Converter(object): |
| 54 | + __metaclass__ = DocStringInheritor |
| 55 | + default_encoding = 'utf-8' |
| 56 | + extension = str() |
| 57 | + figures_counter = 0 |
| 58 | + infile = str() |
| 59 | + infile_dir = str() |
| 60 | + infile_root = str() |
| 61 | + files_dir = str() |
| 62 | + with_preamble = True |
| 63 | + user_preamble = None |
| 64 | + output = unicode() |
| 65 | + raw_as_verbatim = False |
| 66 | + |
| 67 | + def __init__(self, infile): |
| 68 | + self.infile = infile |
| 69 | + self.infile_dir, infile_root = os.path.split(infile) |
| 70 | + infile_root = os.path.splitext(infile_root)[0] |
| 71 | + files_dir = os.path.join(self.infile_dir, infile_root + '_files') |
| 72 | + if not os.path.isdir(files_dir): |
| 73 | + os.mkdir(files_dir) |
| 74 | + self.infile_root = infile_root |
| 75 | + self.files_dir = files_dir |
| 76 | + self.outbase = os.path.join(self.infile_dir, infile_root) |
| 77 | + |
| 78 | + def __del__(self): |
| 79 | + if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir): |
| 80 | + os.rmdir(self.files_dir) |
| 81 | + |
| 82 | + def dispatch(self, cell_type): |
| 83 | + """return cell_type dependent render method, for example render_code |
| 84 | + """ |
| 85 | + return getattr(self, 'render_' + cell_type, self.render_unknown) |
| 86 | + |
| 87 | + def dispatch_display_format(self, format): |
| 88 | + """return output_type dependent render method, for example render_output_text |
| 89 | + """ |
| 90 | + return getattr(self, 'render_display_format_' + format, self.render_unknown_display) |
| 91 | + |
| 92 | + def convert(self, cell_separator='\n'): |
| 93 | + """ |
| 94 | + Generic method to converts notebook to a string representation. |
| 95 | +
|
| 96 | + This is accomplished by dispatching on the cell_type, so subclasses of |
| 97 | + Convereter class do not need to re-implement this method, but just |
| 98 | + need implementation for the methods that will be dispatched. |
| 99 | +
|
| 100 | + Parameters |
| 101 | + ---------- |
| 102 | + cell_separator : string |
| 103 | + Character or string to join cells with. Default is "\n" |
| 104 | +
|
| 105 | + Returns |
| 106 | + ------- |
| 107 | + out : string |
| 108 | + """ |
| 109 | + lines = [] |
| 110 | + lines.extend(self.optional_header()) |
| 111 | + lines.extend(self.main_body(cell_separator)) |
| 112 | + lines.extend(self.optional_footer()) |
| 113 | + return u'\n'.join(lines) |
| 114 | + |
| 115 | + def main_body(self, cell_separator='\n'): |
| 116 | + converted_cells = [] |
| 117 | + for worksheet in self.nb.worksheets: |
| 118 | + for cell in worksheet.cells: |
| 119 | + #print(cell.cell_type) # dbg |
| 120 | + conv_fn = self.dispatch(cell.cell_type) |
| 121 | + if cell.cell_type in ('markdown', 'raw'): |
| 122 | + remove_fake_files_url(cell) |
| 123 | + converted_cells.append('\n'.join(conv_fn(cell))) |
| 124 | + cell_lines = cell_separator.join(converted_cells).split('\n') |
| 125 | + return cell_lines |
| 126 | + |
| 127 | + def render(self): |
| 128 | + "read, convert, and save self.infile" |
| 129 | + if not hasattr(self, 'nb'): |
| 130 | + self.read() |
| 131 | + self.output = self.convert() |
| 132 | + assert(type(self.output) == unicode) |
| 133 | + return self.save() |
| 134 | + |
| 135 | + def read(self): |
| 136 | + "read and parse notebook into NotebookNode called self.nb" |
| 137 | + with open(self.infile) as f: |
| 138 | + self.nb = nbformat.read(f, 'json') |
| 139 | + |
| 140 | + def save(self, outfile=None, encoding=None): |
| 141 | + "read and parse notebook into self.nb" |
| 142 | + if outfile is None: |
| 143 | + outfile = self.outbase + '.' + self.extension |
| 144 | + if encoding is None: |
| 145 | + encoding = self.default_encoding |
| 146 | + with io.open(outfile, 'w', encoding=encoding) as f: |
| 147 | + f.write(self.output) |
| 148 | + return os.path.abspath(outfile) |
| 149 | + |
| 150 | + def optional_header(self): |
| 151 | + """ |
| 152 | + Optional header to insert at the top of the converted notebook |
| 153 | +
|
| 154 | + Returns a list |
| 155 | + """ |
| 156 | + return [] |
| 157 | + |
| 158 | + def optional_footer(self): |
| 159 | + """ |
| 160 | + Optional footer to insert at the end of the converted notebook |
| 161 | +
|
| 162 | + Returns a list |
| 163 | + """ |
| 164 | + return [] |
| 165 | + |
| 166 | + def _new_figure(self, data, fmt): |
| 167 | + """Create a new figure file in the given format. |
| 168 | +
|
| 169 | + Returns a path relative to the input file. |
| 170 | + """ |
| 171 | + figname = '%s_fig_%02i.%s' % (self.infile_root, |
| 172 | + self.figures_counter, fmt) |
| 173 | + self.figures_counter += 1 |
| 174 | + fullname = os.path.join(self.files_dir, figname) |
| 175 | + |
| 176 | + # Binary files are base64-encoded, SVG is already XML |
| 177 | + if fmt in ('png', 'jpg', 'pdf'): |
| 178 | + data = data.decode('base64') |
| 179 | + fopen = lambda fname: open(fname, 'wb') |
| 180 | + else: |
| 181 | + fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding) |
| 182 | + |
| 183 | + with fopen(fullname) as f: |
| 184 | + f.write(data) |
| 185 | + |
| 186 | + return fullname |
| 187 | + |
| 188 | + def render_heading(self, cell): |
| 189 | + """convert a heading cell |
| 190 | +
|
| 191 | + Returns list.""" |
| 192 | + raise NotImplementedError |
| 193 | + |
| 194 | + def render_code(self, cell): |
| 195 | + """Convert a code cell |
| 196 | +
|
| 197 | + Returns list.""" |
| 198 | + raise NotImplementedError |
| 199 | + |
| 200 | + def render_markdown(self, cell): |
| 201 | + """convert a markdown cell |
| 202 | +
|
| 203 | + Returns list.""" |
| 204 | + raise NotImplementedError |
| 205 | + |
| 206 | + def _img_lines(self, img_file): |
| 207 | + """Return list of lines to include an image file.""" |
| 208 | + # Note: subclasses may choose to implement format-specific _FMT_lines |
| 209 | + # methods if they so choose (FMT in {png, svg, jpg, pdf}). |
| 210 | + raise NotImplementedError |
| 211 | + |
| 212 | + def render_display_data(self, output): |
| 213 | + """convert display data from the output of a code cell |
| 214 | +
|
| 215 | + Returns list. |
| 216 | + """ |
| 217 | + lines = [] |
| 218 | + |
| 219 | + for fmt in output.keys(): |
| 220 | + if fmt in ['png', 'svg', 'jpg', 'pdf']: |
| 221 | + img_file = self._new_figure(output[fmt], fmt) |
| 222 | + # Subclasses can have format-specific render functions (e.g., |
| 223 | + # latex has to auto-convert all SVG to PDF first). |
| 224 | + lines_fun = getattr(self, '_%s_lines' % fmt, None) |
| 225 | + if not lines_fun: |
| 226 | + lines_fun = self._img_lines |
| 227 | + lines.extend(lines_fun(img_file)) |
| 228 | + elif fmt != 'output_type': |
| 229 | + conv_fn = self.dispatch_display_format(fmt) |
| 230 | + lines.extend(conv_fn(output)) |
| 231 | + return lines |
| 232 | + |
| 233 | + def render_raw(self, cell): |
| 234 | + """convert a cell with raw text |
| 235 | +
|
| 236 | + Returns list.""" |
| 237 | + raise NotImplementedError |
| 238 | + |
| 239 | + def render_unknown(self, cell): |
| 240 | + """Render cells of unkown type |
| 241 | +
|
| 242 | + Returns list.""" |
| 243 | + data = pprint.pformat(cell) |
| 244 | + logging.warning('Unknown cell: %s' % cell.cell_type) |
| 245 | + return self._unknown_lines(data) |
| 246 | + |
| 247 | + def render_unknown_display(self, output, type): |
| 248 | + """Render cells of unkown type |
| 249 | +
|
| 250 | + Returns list.""" |
| 251 | + data = pprint.pformat(output) |
| 252 | + logging.warning('Unknown output: %s' % output.output_type) |
| 253 | + return self._unknown_lines(data) |
| 254 | + |
| 255 | + def render_stream(self, output): |
| 256 | + """render the stream part of an output |
| 257 | +
|
| 258 | + Returns list. |
| 259 | +
|
| 260 | + Identical to render_display_format_text |
| 261 | + """ |
| 262 | + return self.render_display_format_text(output) |
| 263 | + |
| 264 | + def render_pyout(self, output): |
| 265 | + """convert pyout part of a code cell |
| 266 | +
|
| 267 | + Returns list.""" |
| 268 | + raise NotImplementedError |
| 269 | + |
| 270 | + |
| 271 | + def render_pyerr(self, output): |
| 272 | + """convert pyerr part of a code cell |
| 273 | +
|
| 274 | + Returns list.""" |
| 275 | + raise NotImplementedError |
| 276 | + |
| 277 | + def _unknown_lines(self, data): |
| 278 | + """Return list of lines for an unknown cell. |
| 279 | +
|
| 280 | + Parameters |
| 281 | + ---------- |
| 282 | + data : str |
| 283 | + The content of the unknown data as a single string. |
| 284 | + """ |
| 285 | + raise NotImplementedError |
| 286 | + |
| 287 | + # These are the possible format types in an output node |
| 288 | + |
| 289 | + def render_display_format_text(self, output): |
| 290 | + """render the text part of an output |
| 291 | +
|
| 292 | + Returns list. |
| 293 | + """ |
| 294 | + raise NotImplementedError |
| 295 | + |
| 296 | + def render_display_format_html(self, output): |
| 297 | + """render the html part of an output |
| 298 | +
|
| 299 | + Returns list. |
| 300 | + """ |
| 301 | + raise NotImplementedError |
| 302 | + |
| 303 | + def render_display_format_latex(self, output): |
| 304 | + """render the latex part of an output |
| 305 | +
|
| 306 | + Returns list. |
| 307 | + """ |
| 308 | + raise NotImplementedError |
| 309 | + |
| 310 | + def render_display_format_json(self, output): |
| 311 | + """render the json part of an output |
| 312 | +
|
| 313 | + Returns list. |
| 314 | + """ |
| 315 | + raise NotImplementedError |
| 316 | + |
| 317 | + def render_display_format_javascript(self, output): |
| 318 | + """render the javascript part of an output |
| 319 | +
|
| 320 | + Returns list. |
| 321 | + """ |
| 322 | + raise NotImplementedError |
| 323 | + |
0 commit comments