当 Python 程序中发生异常时,通常会打印出 traceback。了解如何阅读 traceback 可以帮助您轻松识别错误并进行修复。在本教程中,我们将了解 traceback 能告诉我们什么。
完成本教程后,您将了解:
- 如何阅读 traceback
- 如何在没有异常的情况下打印调用堆栈
- traceback 中未显示的内容
通过我的新书 Python for Machine Learning 快速启动您的项目,其中包含分步教程和所有示例的Python 源代码文件。
让我们开始吧。
理解 Python 中的 Traceback
照片作者:Marten Bjork,部分权利保留
教程概述
本教程分为四个部分;它们是
- 简单程序的调用层次结构
- 异常时的 traceback
- 手动触发 traceback
- 模型训练中的一个示例
简单程序的调用层次结构
让我们看一个简单的程序
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
def indentprint(x, indent=0, prefix="", suffix=""): if isinstance(x, dict): printdict(x, indent, prefix, suffix) elif isinstance(x, list): printlist(x, indent, prefix, suffix) elif isinstance(x, str): printstring(x, indent, prefix, suffix) else: printnumber(x, indent, prefix, suffix) def printdict(x, indent, prefix, suffix): spaces = " " * indent print(spaces + prefix + "{") for n, key in enumerate(x): comma = "," if n!=len(x)-1 else "" indentprint(x[key], indent+2, str(key)+": ", comma) print(spaces + "}" + suffix) def printlist(x, indent, prefix, suffix): spaces = " " * indent print(spaces + prefix + "[") for n, item in enumerate(x): comma = "," if n!=len(x)-1 else "" indentprint(item, indent+2, "", comma) print(spaces + "]" + suffix) def printstring(x, indent, prefix, suffix): spaces = " " * indent print(spaces + prefix + '"' + str(x) + '"' + suffix) def printnumber(x, indent, prefix, suffix): spaces = " " * indent print(spaces + prefix + str(x) + suffix) data = { "a": [{ "p": 3, "q": 4, "r": [3,4,5], },{ "f": "foo", "g": 2.71 },{ "u": None, "v": "bar" }], "c": { "s": ["fizz", 2, 1.1], "t": [] }, } indentprint(data) |
这个程序将打印出缩进的 Python 字典 data
。它的输出如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
{ a: [ { p: 3, q: 4, r: [ 3, 4, 5 ] }, { f: "foo", g: 2.71 }, { u: None, v: "bar" } ], c: { s: [ "fizz", 2, 1.1 ], t: [ ] } } |
这是一个简短的程序,但函数之间相互调用。如果我们为每个函数开头添加一行,我们可以通过控制流揭示输出是如何产生的。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
def indentprint(x, indent=0, prefix="", suffix=""): print(f'indentprint(x, {indent}, "{prefix}", "{suffix}")') if isinstance(x, dict): printdict(x, indent, prefix, suffix) elif isinstance(x, list): printlist(x, indent, prefix, suffix) elif isinstance(x, str): printstring(x, indent, prefix, suffix) else: printnumber(x, indent, prefix, suffix) def printdict(x, indent, prefix, suffix): print(f'printdict(x, {indent}, "{prefix}", "{suffix}")') spaces = " " * indent print(spaces + prefix + "{") for n, key in enumerate(x): comma = "," if n!=len(x)-1 else "" indentprint(x[key], indent+2, str(key)+": ", comma) print(spaces + "}" + suffix) def printlist(x, indent, prefix, suffix): print(f'printlist(x, {indent}, "{prefix}", "{suffix}")') spaces = " " * indent print(spaces + prefix + "[") for n, item in enumerate(x): comma = "," if n!=len(x)-1 else "" indentprint(item, indent+2, "", comma) print(spaces + "]" + suffix) def printstring(x, indent, prefix, suffix): print(f'printstring(x, {indent}, "{prefix}", "{suffix}")') spaces = " " * indent print(spaces + prefix + '"' + str(x) + '"' + suffix) def printnumber(x, indent, prefix, suffix): print(f'printnumber(x, {indent}, "{prefix}", "{suffix}")') spaces = " " * indent print(spaces + prefix + str(x) + suffix) |
输出会因更多信息而变得混乱。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
indentprint(x, 0, "", "") printdict(x, 0, "", "") { indentprint(x, 2, "a: ", ",") printlist(x, 2, "a: ", ",") a: [ indentprint(x, 4, "", ",") printdict(x, 4, "", ",") { indentprint(x, 6, "p: ", ",") printnumber(x, 6, "p: ", ",") p: 3, indentprint(x, 6, "q: ", ",") printnumber(x, 6, "q: ", ",") q: 4, indentprint(x, 6, "r: ", "") printlist(x, 6, "r: ", "") r: [ indentprint(x, 8, "", ",") printnumber(x, 8, "", ",") 3, indentprint(x, 8, "", ",") printnumber(x, 8, "", ",") 4, indentprint(x, 8, "", "") printnumber(x, 8, "", "") 5 ] }, indentprint(x, 4, "", ",") printdict(x, 4, "", ",") { indentprint(x, 6, "f: ", ",") printstring(x, 6, "f: ", ",") f: "foo", indentprint(x, 6, "g: ", "") printnumber(x, 6, "g: ", "") g: 2.71 }, indentprint(x, 4, "", "") printdict(x, 4, "", "") { indentprint(x, 6, "u: ", ",") printnumber(x, 6, "u: ", ",") u: None, indentprint(x, 6, "v: ", "") printstring(x, 6, "v: ", "") v: "bar" } ], indentprint(x, 2, "c: ", "") printdict(x, 2, "c: ", "") c: { indentprint(x, 4, "s: ", ",") printlist(x, 4, "s: ", ",") s: [ indentprint(x, 6, "", ",") printstring(x, 6, "", ",") "fizz", indentprint(x, 6, "", ",") printnumber(x, 6, "", ",") 2, indentprint(x, 6, "", "") printnumber(x, 6, "", "") 1.1 ], indentprint(x, 4, "t: ", "") printlist(x, 4, "t: ", "") t: [ ] } } |
现在我们知道了每个函数被调用的顺序。这就是调用堆栈的概念。在任何时间点,当我们运行函数中的一行代码时,我们想知道是什么调用了该函数。
异常时的 traceback
如果我们像下面这样在代码中输入一个错别字
1 2 3 4 5 6 7 |
def printdict(x, indent, prefix, suffix): spaces = " " * indent print(spaces + prefix + "{") for n, key in enumerate(x): comma = "," if n!=len(x)-1 else "" indentprint(x[key], indent+2, str(key)+": ", comma) print(spaces + "}") + suffix |
错别字在最后一行,闭括号应该在行的末尾,而不是在任何 +
之前。print()
函数的返回值是 Python 的 None
对象。将任何东西添加到 None
会触发一个异常。
如果您使用 Python 解释器运行此程序,您会看到这个:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
{ a: [ { p: 3, q: 4, r: [ 3, 4, 5 ] } 回溯(最近一次调用) File "tb.py", line 52, in indentprint(data) File "tb.py", line 3, in indentprint printdict(x, indent, prefix, suffix) File "tb.py", line 16, in printdict indentprint(x[key], indent+2, str(key)+": ", comma) File "tb.py", line 5, in indentprint printlist(x, indent, prefix, suffix) File "tb.py", line 24, in printlist indentprint(item, indent+2, "", comma) File "tb.py", line 3, in indentprint printdict(x, indent, prefix, suffix) File "tb.py", line 17, in printdict print(spaces + "}") + suffix TypeError: unsupported operand type(s) for +: 'NoneType' and 'str' |
以“Traceback (most recent call last):”开头的行是 traceback。它是程序遇到异常时程序的堆栈。在上面的示例中,traceback 是“最近调用的最后”顺序。因此,您的主函数在顶部,而触发异常的函数在底部。所以我们知道问题出在 printdict()
函数内部。
通常,您会在 traceback 的末尾看到错误消息。在此示例中,它是 TypeError
,由将 None
和字符串相加触发。但 traceback 的帮助到此为止。您需要找出哪个是 None
,哪个是字符串。通过阅读 traceback,我们还知道触发异常的函数 printdict()
是由 indentprint()
调用的,而 indentprint()
又是由 printlist()
调用的,依此类推。
如果您在 Jupyter notebook 中运行,输出如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
{ a: [ { p: 3, q: 4, r: [ 3, 4, 5 ] } --------------------------------------------------------------------------- TypeError Traceback (most recent call last) /var/folders/6z/w0ltb1ss08l593y5xt9jyl1w0000gn/T/ipykernel_37031/2508041071.py in ----> 1 indentprint(x) /var/folders/6z/w0ltb1ss08l593y5xt9jyl1w0000gn/T/ipykernel_37031/2327707064.py in indentprint(x, indent, prefix, suffix) 1 def indentprint(x, indent=0, prefix="", suffix="") 2 if isinstance(x, dict) ----> 3 printdict(x, indent, prefix, suffix) 4 elif isinstance(x, list) 5 printlist(x, indent, prefix, suffix) /var/folders/6z/w0ltb1ss08l593y5xt9jyl1w0000gn/T/ipykernel_37031/2327707064.py in printdict(x, indent, prefix, suffix) 14 for n, key in enumerate(x) 15 comma = "," if n!=len(x)-1 else "" ---> 16 indentprint(x[key], indent+2, str(key)+": ", comma) 17 print(spaces + "}") + suffix 18 /var/folders/6z/w0ltb1ss08l593y5xt9jyl1w0000gn/T/ipykernel_37031/2327707064.py in indentprint(x, indent, prefix, suffix) 19 def printlist(x, indent, prefix, suffix) 4 elif isinstance(x, list) ----> 5 printlist(x, indent, prefix, suffix) 6 elif isinstance(x, str) 7 printstring(x, indent, prefix, suffix) /var/folders/6z/w0ltb1ss08l593y5xt9jyl1w0000gn/T/ipykernel_37031/2327707064.py in printlist(x, indent, prefix, suffix) 22 for n, item in enumerate(x) 23 comma = "," if n!=len(x)-1 else "" ---> 24 indentprint(item, indent+2, "", comma) 25 print(spaces + "]" + suffix) 26 /var/folders/6z/w0ltb1ss08l593y5xt9jyl1w0000gn/T/ipykernel_37031/2327707064.py in indentprint(x, indent, prefix, suffix) 1 def indentprint(x, indent=0, prefix="", suffix="") 2 if isinstance(x, dict) ----> 3 printdict(x, indent, prefix, suffix) 4 elif isinstance(x, list) 5 printlist(x, indent, prefix, suffix) /var/folders/6z/w0ltb1ss08l593y5xt9jyl1w0000gn/T/ipykernel_37031/2327707064.py in printdict(x, indent, prefix, suffix) 15 comma = "," if n!=len(x)-1 else "" 16 indentprint(x[key], indent+2, str(key)+": ", comma) ---> 17 print(spaces + "}") + suffix 18 19 def printlist(x, indent, prefix, suffix) TypeError: unsupported operand type(s) for +: 'NoneType' and 'str' |
这些信息基本相同,但它提供了每个函数调用之前和之后的所有行。
想开始学习机器学习 Python 吗?
立即参加我为期7天的免费电子邮件速成课程(附示例代码)。
点击注册,同时获得该课程的免费PDF电子书版本。
手动触发 traceback
打印 traceback 最简单的方法是添加一个 raise
语句来手动创建一个异常。但这也会终止您的程序。如果我们想在任何时候打印堆栈,即使没有任何异常,我们也可以这样做:
1 2 3 4 5 6 7 8 9 10 |
import traceback def printdict(x, indent, prefix, suffix): spaces = " " * indent print(spaces + prefix + "{") for n, key in enumerate(x): comma = "," if n!=len(x)-1 else "" indentprint(x[key], indent+2, str(key)+": ", comma) traceback.print_stack() # 打印当前调用堆栈 print(spaces + "}" + suffix) |
traceback.print_stack()
这行代码将打印当前的调用堆栈。
但事实上,我们通常只在出错时才想打印 traceback(以便我们了解出错的原因)。更常见的用例是以下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
import traceback import random def compute(): n = random.randint(0, 10) m = random.randint(0, 10) return n/m def compute_many(n_times): try: for _ in range(n_times): x = compute() print(f"Completed {n_times} times") except: print("Something wrong") traceback.print_exc() compute_many(100) |
这是重复计算函数(例如蒙特卡洛模拟)的典型模式。但如果我们不够小心,我们可能会遇到一些错误,例如在上面的示例中,我们可能会遇到除以零的情况。问题在于,在更复杂的计算中,您无法轻易发现缺陷。就像上面一样,问题隐藏在对 compute()
的调用中。因此,了解我们如何获得错误很有帮助。但同时,我们希望处理错误情况,而不是让整个程序终止。如果我们使用 try-catch
结构,traceback 默认不会被打印。因此,我们需要手动使用 traceback.print_exc()
语句来完成。
实际上,我们可以让 traceback 更详尽。因为 traceback 是调用堆栈,我们可以检查调用堆栈中的每个函数并检查每个级别的变量。在复杂的案例中,这是我通常用于进行更详细跟踪的函数:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
def print_tb_with_local(): """Print stack trace with local variables. This does not need to be in exception. Print is using the system's print() function to stderr. """ import traceback, sys tb = sys.exc_info()[2] stack = [] while tb: stack.append(tb.tb_frame) tb = tb.tb_next() traceback.print_exc() print("Locals by frame, most recent call first", file=sys.stderr) for frame in stack: print("Frame {0} in {1} at line {2}".format( frame.f_code.co_name, frame.f_code.co_filename, frame.f_lineno), file=sys.stderr) for key, value in frame.f_locals.items(): print("\t%20s = " % key, file=sys.stderr) try: if '__repr__' in dir(value): print(value.__repr__(), file=sys.stderr) elif '__str__' in dir(value): print(value.__str__(), file=sys.stderr) else: print(value, file=sys.stderr) except: print("", file=sys.stderr) |
模型训练示例
traceback 中报告的调用堆栈有一个限制:您只能看到 Python 函数。对于您编写的程序来说,这通常没问题,但许多大型 Python 库的部分是用其他语言编写并编译成二进制文件的。例如 Tensorflow。为了性能,所有底层操作都是二进制的。因此,如果您运行以下代码,您会看到一些不同的东西:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
import numpy as np sequence = np.arange(0.1, 1.0, 0.1) # 0.1 to 0.9 n_in = len(sequence) sequence = sequence.reshape((1, n_in, 1)) # 定义模型 import tensorflow as tf from tensorflow.keras.layers import LSTM, RepeatVector, Dense, TimeDistributed, Input from tensorflow.keras import Sequential, Model model = Sequential([ LSTM(100, activation="relu", input_shape=(n_in+1, 1)), RepeatVector(n_in), LSTM(100, activation="relu", return_sequences=True), TimeDistributed(Dense(1)) ]) model.compile(optimizer="adam", loss="mse") model.fit(sequence, sequence, epochs=300, verbose=0) |
模型中第一个 LSTM 层的 input_shape
参数应为 (n_in, 1)
以匹配输入数据,而不是 (n_in+1, 1)
。运行最后一行时,此代码将打印以下错误:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
回溯(最近一次调用) File "trback3.py", line 20, in model.fit(sequence, sequence, epochs=300, verbose=0) File "/usr/local/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler raise e.with_traceback(filtered_tb) from None File "/usr/local/lib/python3.9/site-packages/tensorflow/python/framework/func_graph.py", line 1129, in autograph_handler raise e.ag_error_metadata.to_exception(e) ValueError:在用户代码中 File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 878, in train_function * return step_function(self, iterator) File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 867, in step_function ** outputs = model.distribute_strategy.run(run_step, args=(data,)) File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 860, in run_step ** outputs = model.train_step(data) File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 808, in train_step y_pred = self(x, training=True) File "/usr/local/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler raise e.with_traceback(filtered_tb) from None File "/usr/local/lib/python3.9/site-packages/keras/engine/input_spec.py", line 263, in assert_input_compatibility raise ValueError(f'Input {input_index} of layer "{layer_name}" is ' ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 10, 1), found shape=(None, 9, 1) |
如果查看堆栈跟踪,您将无法看到完整的调用堆栈。例如,您知道顶层调用了 model.fit()
,但第二层来自一个名为 error_handler()
的函数。在这里,您看不到 fit()
函数是如何触发它的。这是因为 TensorFlow 经过了高度优化。很多东西都隐藏在编译后的代码中,Python 解释器无法看到。
在这种情况下,耐心地阅读堆栈跟踪并找到原因的线索至关重要。当然,错误消息通常也会为您提供一些有用的提示。
进一步阅读
如果您想深入了解,本节提供了更多关于该主题的资源。
书籍
- Python Cookbook, 3rd edition by David Beazley and Brian K. Jones
Python 官方文档
总结
在本教程中,您学习了如何读取和打印 Python 程序中的堆栈跟踪。
具体来说,你学到了:
- 堆栈跟踪告诉您哪些信息
- 如何在不引发异常的情况下在程序的任何点打印堆栈跟踪
在下一篇文章中,我们将学习如何在 Python 调试器中导航调用堆栈。
非常感谢。我将尝试练习这个 bash 脚本。如果遇到问题,我会向您提问的,亲爱的。
不客气,Manza!继续努力!
感谢您的这篇文章。非常有帮助。
一条评论
我认为
print_tb_with_local()
的第 10 行有一个拼写错误应该是
tb = tb.tb_next
(没有括号)。
祝好,
Yuval
感谢 Yuval 的反馈!