Pytorch|Pytorch Transforms Tensor PytorchTransformsTensor

transforms代码

[docs]class Compose(object): """Composes several transforms together.Args: transforms (list of ``Transform`` objects): list of transforms to compose.Example: >>> transforms.Compose([ >>>transforms.CenterCrop(10), >>>transforms.ToTensor(), >>> ]) """def __init__(self, transforms): self.transforms = transformsdef __call__(self, img): for t in self.transforms: img = t(img) return imgdef __repr__(self): format_string = self.__class__.__name__ + '(' for t in self.transforms: format_string += '\n' format_string += '{0}'.format(t) format_string += '\n)' return format_string

transforms.ToTensor()代码，把PIL.Image.Image和numpy.ndarray转换成Tensor，把（H,W,C)转换成（C,H,W)，把（H,W)转换成（1，H,W)，若原数据类型是uint8,则归一化到（0,255）。

[docs]class ToTensor(object): """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1) or if the numpy.ndarray has dtype = np.uint8In the other cases, tensors are returned without scaling. """[docs]def __call__(self, pic): """ Args: pic (PIL Image or numpy.ndarray): Image to be converted to tensor.Returns: Tensor: Converted image. """ return F.to_tensor(pic)def __repr__(self): return self.__class__.__name__ + '()'def to_tensor(pic): """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.See ``ToTensor`` for more details.Args: pic (PIL Image or numpy.ndarray): Image to be converted to tensor.Returns: Tensor: Converted image. """ if not(_is_pil_image(pic) or _is_numpy(pic)): raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic)))if _is_numpy(pic) and not _is_numpy_image(pic): raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim))if isinstance(pic, np.ndarray): # handle numpy array if pic.ndim == 2: pic = pic[:, :, None]img = torch.from_numpy(pic.transpose((2, 0, 1))) # backward compatibility if isinstance(img, torch.ByteTensor): return img.float().div(255) else: return imgif accimage is not None and isinstance(pic, accimage.Image): nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32) pic.copyto(nppic) return torch.from_numpy(nppic)# handle PIL Image if pic.mode == 'I': img = torch.from_numpy(np.array(pic, np.int32, copy=False)) elif pic.mode == 'I; 16': img = torch.from_numpy(np.array(pic, np.int16, copy=False)) elif pic.mode == 'F': img = torch.from_numpy(np.array(pic, np.float32, copy=False)) elif pic.mode == '1': img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False)) else: img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) # PIL image mode: L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK if pic.mode == 'YCbCr': nchannel = 3 elif pic.mode == 'I; 16': nchannel = 1 else: nchannel = len(pic.mode) img = img.view(pic.size[1], pic.size[0], nchannel) # put it from HWC to CHW format # yikes, this transpose takes 80% of the loading time/CPU img = img.transpose(0, 1).transpose(0, 2).contiguous() if isinstance(img, torch.ByteTensor): return img.float().div(255) else: return img

将ndarray或tensor转换为PIL.Image。mode为Image的数据类型，可以为空。

[docs]class ToPILImage(object): """Convert a tensor or an ndarray to PIL Image.Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape H x W x C to a PIL Image while preserving the value range.Args: mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). If ``mode`` is ``None`` (default) there are some assumptions made about the input data: - If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``. - If the input has 3 channels, the ``mode`` is assumed to be ``RGB``. - If the input has 2 channels, the ``mode`` is assumed to be ``LA``. - If the input has 1 channel, the ``mode`` is determined by the data type (i.e ``int``, ``float``, ``short``)... _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes """ def __init__(self, mode=None): self.mode = mode[docs]def __call__(self, pic): """ Args: pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.Returns: PIL Image: Image converted to PIL Image.""" return F.to_pil_image(pic, self.mode)def __repr__(self): format_string = self.__class__.__name__ + '(' if self.mode is not None: format_string += 'mode={0}'.format(self.mode) format_string += ')' return format_stringdef to_pil_image(pic, mode=None): """Convert a tensor or an ndarray to PIL Image.See :class:`~torchvision.transforms.ToPILImage` for more details.Args: pic (Tensor or numpy.ndarray): Image to be converted to PIL Image. mode (`PIL.Image mode`_): color space and pixel depth of input data (optional)... _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modesReturns: PIL Image: Image converted to PIL Image. """ if not(isinstance(pic, torch.Tensor) or isinstance(pic, np.ndarray)): raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic)))elif isinstance(pic, torch.Tensor): if pic.ndimension() not in {2, 3}: raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndimension()))elif pic.ndimension() == 2: # if 2D image, add channel dimension (CHW) pic = pic.unsqueeze(0)elif isinstance(pic, np.ndarray): if pic.ndim not in {2, 3}: raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim))elif pic.ndim == 2: # if 2D image, add channel dimension (HWC) pic = np.expand_dims(pic, 2)npimg = pic if isinstance(pic, torch.FloatTensor) and mode != 'F': pic = pic.mul(255).byte() if isinstance(pic, torch.Tensor): npimg = np.transpose(pic.numpy(), (1, 2, 0))if not isinstance(npimg, np.ndarray): raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' + 'not {}'.format(type(npimg)))if npimg.shape[2] == 1: expected_mode = None npimg = npimg[:, :, 0] if npimg.dtype == np.uint8: expected_mode = 'L' elif npimg.dtype == np.int16: expected_mode = 'I; 16' elif npimg.dtype == np.int32: expected_mode = 'I' elif npimg.dtype == np.float32: expected_mode = 'F' if mode is not None and mode != expected_mode: raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}" .format(mode, np.dtype, expected_mode)) mode = expected_modeelif npimg.shape[2] == 2: permitted_2_channel_modes = ['LA'] if mode is not None and mode not in permitted_2_channel_modes: raise ValueError("Only modes {} are supported for 2D inputs".format(permitted_2_channel_modes))if mode is None and npimg.dtype == np.uint8: mode = 'LA'elif npimg.shape[2] == 4: permitted_4_channel_modes = ['RGBA', 'CMYK', 'RGBX'] if mode is not None and mode not in permitted_4_channel_modes: raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes))if mode is None and npimg.dtype == np.uint8: mode = 'RGBA' else: permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV'] if mode is not None and mode not in permitted_3_channel_modes: raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes)) if mode is None and npimg.dtype == np.uint8: mode = 'RGB'if mode is None: raise TypeError('Input type {} is not supported'.format(npimg.dtype))return Image.fromarray(npimg, mode=mode)

transforms.Normalize，创建Normalize对象是大小为C的mean和std列表，调用是（C，H，W）的tensor_image。

[docs]class Normalize(object): """Normalize a tensor image with mean and standard deviation. Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform will normalize each channel of the input ``torch.*Tensor`` i.e. ``input[channel] = (input[channel] - mean[channel]) / std[channel]``.. note:: This transform acts out of place, i.e., it does not mutates the input tensor.Args: mean (sequence): Sequence of means for each channel. std (sequence): Sequence of standard deviations for each channel. inplace(bool,optional): Bool to make this operation in-place."""def __init__(self, mean, std, inplace=False): self.mean = mean self.std = std self.inplace = inplace[docs]def __call__(self, tensor): """ Args: tensor (Tensor): Tensor image of size (C, H, W) to be normalized.Returns: Tensor: Normalized Tensor image. """ return F.normalize(tensor, self.mean, self.std, self.inplace)def __repr__(self): return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)def normalize(tensor, mean, std, inplace=False): """Normalize a tensor image with mean and standard deviation... note:: This transform acts out of place by default, i.e., it does not mutates the input tensor.See :class:`~torchvision.transforms.Normalize` for more details.Args: tensor (Tensor): Tensor image of size (C, H, W) to be normalized. mean (sequence): Sequence of means for each channel. std (sequence): Sequence of standard deviations for each channel. inplace(bool,optional): Bool to make this operation inplace.Returns: Tensor: Normalized Tensor image. """ if not _is_tensor_image(tensor): raise TypeError('tensor is not a torch image.')if not inplace: tensor = tensor.clone()dtype = tensor.dtype mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device) std = torch.as_tensor(std, dtype=dtype, device=tensor.device) #将mean和std转换成（C，1,1) tensor.sub_(mean[:, None, None]).div_(std[:, None, None]) return tensor

transform.LinearTransformation。先把图像flatten成（1，C*H*W)的形状的向量，再减去mean_vector (Tensor): tensor （1， D = C x H x W）乘以transformation_matrix (Tensor): tensor （D = C x H x W，D = C x H x W），再flat_tensor.view(tensor.size())。

[docs]class LinearTransformation(object): """Transform a tensor image with a square transformation matrix and a mean_vector computed offline. Given transformation_matrix and mean_vector, will flatten the torch.*Tensor and subtract mean_vector from it which is then followed by computing the dot product with the transformation matrix and then reshaping the tensor to its original shape.Applications: whitening transformation: Suppose X is a column vector zero-centered data. Then compute the data covariance matrix [D x D] with torch.mm(X.t(), X), perform SVD on this matrix and pass it as transformation_matrix.Args: transformation_matrix (Tensor): tensor [D x D], D = C x H x W mean_vector (Tensor): tensor [D], D = C x H x W """def __init__(self, transformation_matrix, mean_vector): if transformation_matrix.size(0) != transformation_matrix.size(1): raise ValueError("transformation_matrix should be square. Got " + "[{} x {}] rectangular matrix.".format(*transformation_matrix.size()))if mean_vector.size(0) != transformation_matrix.size(0): raise ValueError("mean_vector should have the same length {}".format(mean_vector.size(0)) + " as any one of the dimensions of the transformation_matrix [{} x {}]" .format(transformation_matrix.size()))self.transformation_matrix = transformation_matrix self.mean_vector = mean_vectordef __call__(self, tensor): """ Args: tensor (Tensor): Tensor image of size (C, H, W) to be whitened.Returns: Tensor: Transformed image. """ if tensor.size(0) * tensor.size(1) * tensor.size(2) != self.transformation_matrix.size(0): raise ValueError("tensor and transformation matrix have incompatible shape." + "[{} x {} x {}] != ".format(*tensor.size()) + "{}".format(self.transformation_matrix.size(0))) flat_tensor = tensor.view(1, -1) - self.mean_vector transformed_tensor = torch.mm(flat_tensor, self.transformation_matrix) tensor = transformed_tensor.view(tensor.size()) return tensordef __repr__(self): format_string = self.__class__.__name__ + '(transformation_matrix=' format_string += (str(self.transformation_matrix.tolist()) + ')') format_string += (", (mean_vector=" + str(self.mean_vector.tolist()) + ')') return format_string

transforms.RandomErasing。输入torch_image，（C，H，W），随机是否擦除，随机擦除面积，随机擦除长宽比例，随机位置，随机通道，随机值擦除。
randomerase=transforms.RandomErasing(p=1,scale=(0.2,0.5),value=https://www.it610.com/article/(255,255,0))
im=Image.open(r'C:\Users\Administrator\Desktop\panda.jpg')
#im_data=https://www.it610.com/article/torch.from_numpy(np.array(im))
im_data_t=transforms.ToTensor()(im)
erased=randomerase(im_data_t)
erased_im=transforms.ToPILImage()(erased)

[docs]class RandomErasing(object): """ Randomly selects a rectangle region in an image and erases its pixels. 'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/pdf/1708.04896.pdf Args: p: probability that the random erasing operation will be performed. scale: range of proportion of erased area against input image. ratio: range of aspect ratio of erased area. value: erasing value. Default is 0. If a single int, it is used to erase all pixels. If a tuple of length 3, it is used to erase R, G, B channels respectively. If a str of 'random', erasing each pixel with random values. inplace: boolean to make this transform inplace. Default set to False.Returns: Erased Image. # Examples: >>> transform = transforms.Compose([ >>> transforms.RandomHorizontalFlip(), >>> transforms.ToTensor(), >>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), >>> transforms.RandomErasing(), >>> ]) """def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=https://www.it610.com/article/0, inplace=False): assert isinstance(value, (numbers.Number, str, tuple, list)) if (scale[0]> scale[1]) or (ratio[0] > ratio[1]): warnings.warn("range should be of kind (min, max)") if scale[0] < 0 or scale[1] > 1: raise ValueError("range of scale should be between 0 and 1") if p < 0 or p > 1: raise ValueError("range of random erasing probability should be between 0 and 1")self.p = p self.scale = scale self.ratio = ratio self.value = https://www.it610.com/article/value self.inplace = inplace@staticmethod def get_params(img, scale, ratio, value=0):"""Get parameters for ``erase`` for a random erasing.Args: img (Tensor): Tensor image of size (C, H, W) to be erased. scale: range of proportion of erased area against input image. ratio: range of aspect ratio of erased area.Returns: tuple: params (i, j, h, w, v) to be passed to ``erase`` for random erasing. """ img_c, img_h, img_w = img.shape area = img_h * img_wfor attempt in range(10): erase_area = random.uniform(scale[0], scale[1]) * area aspect_ratio = random.uniform(ratio[0], ratio[1])h = int(round(math.sqrt(erase_area * aspect_ratio))) w = int(round(math.sqrt(erase_area / aspect_ratio)))if h < img_h and w < img_w: i = random.randint(0, img_h - h) j = random.randint(0, img_w - w) if isinstance(value, numbers.Number): v = value elif isinstance(value, torch._six.string_classes): v = torch.empty([img_c, h, w], dtype=torch.float32).normal_() elif isinstance(value, (list, tuple)): #转换成相同维度，和矩形框相同大小 v = torch.tensor(value, dtype=torch.float32).view(-1, 1, 1).expand(-1, h, w) return i, j, h, w, v# Return original image return 0, 0, img_h, img_w, imgdef __call__(self, img): """ Args: img (Tensor): Tensor image of size (C, H, W) to be erased.Returns: img (Tensor): Erased Tensor image. """ if random.uniform(0, 1) < self.p: x, y, h, w, v = self.get_params(img, scale=self.scale, ratio=self.ratio, value=https://www.it610.com/article/self.value) return F.erase(img, x, y, h, w, v, self.inplace) return imgdef erase(img, i, j, h, w, v, inplace=False):""" Erase the input Tensor Image with given value.Args: img (Tensor Image): Tensor image of size (C, H, W) to be erased i (int): i in (i,j) i.e coordinates of the upper left corner. j (int): j in (i,j) i.e coordinates of the upper left corner. h (int): Height of the erased region. w (int): Width of the erased region. v: Erasing value. inplace(bool, optional): For in-place operations. By default is set False.Returns: Tensor Image: Erased image. """ if not isinstance(img, torch.Tensor): raise TypeError('img should be Tensor Image. Got {}'.format(type(img)))if not inplace: img = img.clone()img[:, i:i + h, j:j + w] = v return img

【Pytorch|Pytorch Transforms Tensor】