In [1]:
import torch

In [2]:
# 检查是否有GPU
torch.cuda.is_available()

True

In [3]:
# GPU的个数
torch.cuda.device_count()

1

In [4]:
# 默认情况下,创建的张量存放在内存,使用CPU进行计算
x = torch.randn(2, 3)
print(x.is_cuda)
# 可以使用张量提供的函数,将数据移到GPU
# 当有n个GPU时,相应的设备id是cuda:0, ... cude:n-1
print(x.to('cuda:0').is_cuda)

False
True


In [5]:
# 在创建张量时,通过指定device来将张量移到GPU
y = torch.randn(2, 3, device='cuda:0')
print(y.is_cuda)
print(y.to('cpu').is_cuda)

True
False


In [6]:
# 不支持跨计算核心运算
x + y

RuntimeError: ignored

In [7]:
import time

def measure_compute_time(device_id, dimension):
 """
 展示GPU在串行计算和并行计算时的表现
 当dimension比较大时,GPU主要进行并行计算
 """
 start_time = time.time()
 x = torch.ones((dimension, dimension), device=device_id)
 # for循环时串行计算
 for _ in range(10 ** 5):
 # x是矩阵,x + x是并行计算
 x + x
 elapsed_time = time.time() - start_time
 print(f'For device {device_id}, compute time = {elapsed_time: .4f}')

In [8]:
measure_compute_time('cpu', 1)
measure_compute_time('cuda:0', 1)

For device cpu, compute time = 0.5259
For device cuda:0, compute time = 1.0495


In [9]:
measure_compute_time('cpu', 1000)
measure_compute_time('cuda:0', 1000)

For device cpu, compute time = 27.2803
For device cuda:0, compute time = 1.1041
