-
Notifications
You must be signed in to change notification settings - Fork 4.5k
Expand file tree
/
Copy pathprofiler.py
More file actions
180 lines (157 loc) · 5.72 KB
/
profiler.py
File metadata and controls
180 lines (157 loc) · 5.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""A profiler context manager based on cProfile.Profile and guppy.hpy objects.
For internal use only; no backwards-compatibility guarantees.
"""
# pytype: skip-file
# mypy: check-untyped-defs
import cProfile
import io
import logging
import os
import pstats
import random
import tempfile
import time
from typing import Callable
from typing import Optional
from apache_beam.io import filesystems
_LOGGER = logging.getLogger(__name__)
class Profile(object):
"""cProfile and Heapy wrapper context for saving and logging profiler
results."""
SORTBY = 'cumulative'
profile_output: str
stats: pstats.Stats
def __init__(
self,
profile_id: str,
profile_location: Optional[str] = None,
log_results: bool = False,
file_copy_fn: Optional[Callable[[str, str], None]] = None,
time_prefix: str = '%Y-%m-%d_%H_%M_%S-',
enable_cpu_profiling: bool = False,
enable_memory_profiling: bool = False,
):
"""Creates a Profile object.
Args:
profile_id: Unique id of the profiling session.
profile_location: The file location where the profiling results will be
stored.
log_results: Log the result to console if true.
file_copy_fn: Lambda function for copying files.
time_prefix: Format of the timestamp prefix in profiling result files.
enable_cpu_profiling: CPU profiler will be enabled during the profiling
session.
enable_memory_profiling: Memory profiler will be enabled during the
profiling session, the profiler only records the newly allocated objects
in this session.
"""
self.profile_id = str(profile_id)
self.profile_location = profile_location
self.log_results = log_results
self.file_copy_fn = file_copy_fn or self.default_file_copy_fn
self.time_prefix = time_prefix
self.enable_cpu_profiling = enable_cpu_profiling
self.enable_memory_profiling = enable_memory_profiling
def __enter__(self):
_LOGGER.info('Start profiling: %s', self.profile_id)
if self.enable_cpu_profiling:
self.profile = cProfile.Profile()
self.profile.enable()
if self.enable_memory_profiling:
try:
from guppy import hpy
self.hpy = hpy()
self.hpy.setrelheap()
except ImportError:
_LOGGER.info("Unable to import guppy for memory profiling")
self.hpy = None
return self
def __exit__(self, *args):
_LOGGER.info('Stop profiling: %s', self.profile_id)
if self.profile_location:
if self.enable_cpu_profiling:
self.profile.create_stats()
self.profile_output = self._upload_profile_data(
# typing: seems stats attr is missing from typeshed
self.profile_location,
'cpu_profile',
self.profile.stats)
if self.enable_memory_profiling:
if not self.hpy:
pass
else:
h = self.hpy.heap()
heap_dump_data = '%s\n%s' % (h, h.more)
self._upload_profile_data(
self.profile_location,
'memory_profile',
heap_dump_data,
write_binary=False)
if self.log_results:
if self.enable_cpu_profiling:
s = io.StringIO()
self.stats = pstats.Stats(
self.profile, stream=s).sort_stats(Profile.SORTBY)
self.stats.print_stats()
_LOGGER.info('Cpu profiler data: [%s]', s.getvalue())
if self.enable_memory_profiling and self.hpy:
_LOGGER.info('Memory profiler data: \n%s' % self.hpy.heap())
@staticmethod
def default_file_copy_fn(src, dest):
dest_handle = filesystems.FileSystems.create(dest + '.tmp')
try:
with open(src, 'rb') as src_handle:
dest_handle.write(src_handle.read())
finally:
dest_handle.close()
filesystems.FileSystems.rename([dest + '.tmp'], [dest])
@staticmethod
def factory_from_options(options) -> Optional[Callable[..., 'Profile']]:
if options.profile_cpu or options.profile_memory:
def create_profiler(profile_id, **kwargs):
if random.random() < options.profile_sample_rate:
return Profile(
profile_id,
options.profile_location,
enable_cpu_profiling=options.profile_cpu,
enable_memory_profiling=options.profile_memory,
**kwargs)
return create_profiler
return None
def _upload_profile_data(
self, profile_location, dir, data, write_binary=True) -> str:
dump_location = os.path.join(
profile_location,
dir,
time.strftime(self.time_prefix + self.profile_id))
fd, filename = tempfile.mkstemp()
try:
os.close(fd)
if write_binary:
with open(filename, 'wb') as fb:
import marshal
marshal.dump(data, fb)
else:
with open(filename, 'w') as f:
f.write(data)
_LOGGER.info('Copying profiler data to: [%s]', dump_location)
self.file_copy_fn(filename, dump_location)
finally:
os.remove(filename)
return dump_location