python-pytorch-rocm-6.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

From 602abf6b55cb11c3dce0c046b3cfbc417b5080d6 Mon Sep 17 00:00:00 2001
From: Jeff Daily <jeff.daily@amd.com>
Date: Wed, 20 Dec 2023 20:19:25 +0000
Subject: [PATCH] [ROCm] more 6.0 changes (#115946)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/115946
Approved by: https://github.com/pruthvistony, https://github.com/huydhn, https://github.com/malfet
---
 aten/src/ATen/cuda/detail/CUDAHooks.cpp | 2 +-
 caffe2/CMakeLists.txt                   | 2 +-
 caffe2/core/common_gpu.h                | 2 +-
 caffe2/operators/elementwise_ops.cu     | 2 ++
 cmake/public/LoadHIP.cmake              | 3 +--
 5 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/aten/src/ATen/cuda/detail/CUDAHooks.cpp b/aten/src/ATen/cuda/detail/CUDAHooks.cpp
index acb9b1931f045e0..24c29a638154499 100644
--- a/aten/src/ATen/cuda/detail/CUDAHooks.cpp
+++ b/aten/src/ATen/cuda/detail/CUDAHooks.cpp
@@ -151,7 +151,7 @@ bool CUDAHooks::isPinnedPtr(const void* data) const {
     return false;
   }
 #endif
-#if !defined(USE_ROCM)
+#if !defined(USE_ROCM) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
   return attr.type == cudaMemoryTypeHost;
 #else
   return attr.memoryType == cudaMemoryTypeHost;
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index f2acc61ad389c7e..b035e50f81ae3f6 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -698,7 +698,7 @@ if(USE_ROCM)
   add_library(caffe2_nvrtc SHARED ${ATen_NVRTC_STUB_SRCS})
   target_link_libraries(caffe2_nvrtc ${PYTORCH_HIP_LIBRARIES} ${ROCM_HIPRTC_LIB})
   target_include_directories(caffe2_nvrtc PRIVATE ${CMAKE_BINARY_DIR})
-  target_compile_definitions(caffe2_nvrtc PRIVATE USE_ROCM __HIP_PLATFORM_HCC__)
+  target_compile_definitions(caffe2_nvrtc PRIVATE USE_ROCM __HIP_PLATFORM_AMD__)
   install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}")
 endif()
 
diff --git a/caffe2/core/common_gpu.h b/caffe2/core/common_gpu.h
index af1cc891e567eff..bbf25a51352175b 100644
--- a/caffe2/core/common_gpu.h
+++ b/caffe2/core/common_gpu.h
@@ -86,7 +86,7 @@ namespace caffe2 {
 class TensorCoreEngine {};
 #endif // USE_ROCM
 
-#if !defined(USE_ROCM)
+#if !defined(USE_ROCM) || (defined(USE_ROCM) && ROCM_VERSION >= 50700)
 #define CAFFE2_CUDA_PTRATTR_MEMTYPE type
 #else
 #define CAFFE2_CUDA_PTRATTR_MEMTYPE memoryType
diff --git a/caffe2/operators/elementwise_ops.cu b/caffe2/operators/elementwise_ops.cu
index b78b94c52147806..2237f2015feba2d 100644
--- a/caffe2/operators/elementwise_ops.cu
+++ b/caffe2/operators/elementwise_ops.cu
@@ -13,8 +13,10 @@
 // until we use hipblas v2
 // hipify correctly maps things like CUDA_R_16F to HIP_R_16F,
 // however hipblas v1 is still using its custom type
+#ifndef HIPBLAS_V2
 #define HIP_R_16F  HIPBLAS_R_16F
 #define HIP_R_32F  HIPBLAS_R_32F
+#endif
 #endif // USE_ROCM
 
 namespace caffe2 {
diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
index f7344cc310842e1..197c60e7b7f6297 100644
--- a/cmake/public/LoadHIP.cmake
+++ b/cmake/public/LoadHIP.cmake
@@ -187,8 +187,7 @@ if(HIP_FOUND)
   else()
     find_library(PYTORCH_RCCL_LIBRARIES ${rccl_LIBRARIES} HINTS ${ROCM_PATH}/lib)
   endif()
-  # hiprtc is part of HIP
-  find_library(ROCM_HIPRTC_LIB amdhip64 HINTS ${ROCM_PATH}/lib)
+  find_library(ROCM_HIPRTC_LIB hiprtc HINTS ${ROCM_PATH}/lib)
   # roctx is part of roctracer
   find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCM_PATH}/lib)