模型:
timm/swinv2_cr_small_ns_224.sw_in1k
这是在正式代码发布之前独立实现的 Swin Transformer V2 模型。由 Christoph Reich 和 Ross Wightman 合作开发,该模型与官方实现有几点不同:
from urllib.request import urlopen from PIL import Image import timm img = Image.open(urlopen( 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png' )) model = timm.create_model('swinv2_cr_small_ns_224.sw_in1k', pretrained=True) model = model.eval() # get model specific transforms (normalization, resize) data_config = timm.data.resolve_model_data_config(model) transforms = timm.data.create_transform(**data_config, is_training=False) output = model(transforms(img).unsqueeze(0)) # unsqueeze single image into batch of 1 top5_probabilities, top5_class_indices = torch.topk(output.softmax(dim=1) * 100, k=5)
from urllib.request import urlopen from PIL import Image import timm img = Image.open(urlopen( 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png' )) model = timm.create_model( 'swinv2_cr_small_ns_224.sw_in1k', pretrained=True, features_only=True, ) model = model.eval() # get model specific transforms (normalization, resize) data_config = timm.data.resolve_model_data_config(model) transforms = timm.data.create_transform(**data_config, is_training=False) output = model(transforms(img).unsqueeze(0)) # unsqueeze single image into batch of 1 for o in output: # print shape of each feature map in output # e.g. for swin_base_patch4_window7_224 (NHWC output) # torch.Size([1, 56, 56, 128]) # torch.Size([1, 28, 28, 256]) # torch.Size([1, 14, 14, 512]) # torch.Size([1, 7, 7, 1024]) # e.g. for swinv2_cr_small_ns_224 (NCHW output) # torch.Size([1, 96, 56, 56]) # torch.Size([1, 192, 28, 28]) # torch.Size([1, 384, 14, 14]) # torch.Size([1, 768, 7, 7]) print(o.shape)
from urllib.request import urlopen from PIL import Image import timm img = Image.open(urlopen( 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png' )) model = timm.create_model( 'swinv2_cr_small_ns_224.sw_in1k', pretrained=True, num_classes=0, # remove classifier nn.Linear ) model = model.eval() # get model specific transforms (normalization, resize) data_config = timm.data.resolve_model_data_config(model) transforms = timm.data.create_transform(**data_config, is_training=False) output = model(transforms(img).unsqueeze(0)) # output is (batch_size, num_features) shaped tensor # or equivalently (without needing to set num_classes=0) output = model.forward_features(transforms(img).unsqueeze(0)) # output is unpooled (ie.e a (batch_size, H, W, num_features) tensor for swin / swinv2 # or (batch_size, num_features, H, W) for swinv2_cr output = model.forward_head(output, pre_logits=True) # output is (batch_size, num_features) tensor
在 timm model results 中探索该模型的数据集和运行时指标。
@inproceedings{liu2021swinv2, title={Swin Transformer V2: Scaling Up Capacity and Resolution}, author={Ze Liu and Han Hu and Yutong Lin and Zhuliang Yao and Zhenda Xie and Yixuan Wei and Jia Ning and Yue Cao and Zheng Zhang and Li Dong and Furu Wei and Baining Guo}, booktitle={International Conference on Computer Vision and Pattern Recognition (CVPR)}, year={2022} }
@misc{rw2019timm, author = {Ross Wightman}, title = {PyTorch Image Models}, year = {2019}, publisher = {GitHub}, journal = {GitHub repository}, doi = {10.5281/zenodo.4414861}, howpublished = {\url{https://github.com/huggingface/pytorch-image-models}} }