Skip to content

Commit

Permalink
Squashed 'src/composable_kernel/' changes from f6edda611..5781adf5c
Browse files Browse the repository at this point in the history
5781adf5c Update develop (#5) (#6)
97e6d514f Merge pull request #4 from ROCmSoftwarePlatform/separate_online_compile
7b1ec41e5 refactor
49c33aaea refactor
54b3e73d1 rename

git-subtree-dir: src/composable_kernel
git-subtree-split: 5781adf5cf4ac753e2e36da7385791775b744bf7
  • Loading branch information
Chao Liu committed Aug 6, 2021
1 parent c840438 commit 437cc59
Show file tree
Hide file tree
Showing 58 changed files with 136 additions and 123 deletions.
3 changes: 1 addition & 2 deletions external/half/include/half.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2404,8 +2404,7 @@ unsigned int gamma(unsigned int arg)
0.0114684895434781459556 }; double t = arg + 4.65, s = p[0]; for(unsigned int i=0; i<5; ++i)
s += p[i+1] / (arg+i);
return std::log(s) + (arg-0.5)*std::log(t) - t;
*/ static const f31 pi(0xC90FDAA2, 1),
lbe(0xB8AA3B29, 0);
*/ static const f31 pi(0xC90FDAA2, 1), lbe(0xB8AA3B29, 0);
unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
bool bsign = sign != 0;
f31 z(abs), x = sign ? (z + f31(0x80000000, 0)) : z, t = x + f31(0x94CCCCCD, 2),
Expand Down
2 changes: 1 addition & 1 deletion host/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
add_subdirectory(host_tensor)
add_subdirectory(online_compilation)
add_subdirectory(online_compile)
add_subdirectory(driver_offline)
add_subdirectory(driver_online)
1 change: 1 addition & 0 deletions host/driver_offline/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
include_directories(BEFORE
include
${PROJECT_SOURCE_DIR}/host/host_tensor/include
${PROJECT_SOURCE_DIR}/host/solver/include
${PROJECT_SOURCE_DIR}/composable_kernel/include
${PROJECT_SOURCE_DIR}/composable_kernel/include/utility
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description
Expand Down
7 changes: 4 additions & 3 deletions host/driver_online/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
include_directories(BEFORE
include
${PROJECT_BINARY_DIR}/host/online_compilation/include
${PROJECT_SOURCE_DIR}/host/online_compilation/include
${PROJECT_BINARY_DIR}/host/online_compile/include
${PROJECT_SOURCE_DIR}/host/online_compile/include
${PROJECT_SOURCE_DIR}/host/host_tensor/include
${PROJECT_SOURCE_DIR}/host/solver/include
${PROJECT_SOURCE_DIR}/composable_kernel/include
${PROJECT_SOURCE_DIR}/composable_kernel/include/utility
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description
Expand All @@ -18,4 +19,4 @@ set(CONV_FWD_DRIVER_ONLINE_SOURCE conv_fwd_driver_online.cpp)
add_executable(conv_fwd_driver_online ${CONV_FWD_DRIVER_ONLINE_SOURCE})

target_link_libraries(conv_fwd_driver_online PRIVATE host_tensor)
target_link_libraries(conv_fwd_driver_online PRIVATE online_compilation)
target_link_libraries(conv_fwd_driver_online PRIVATE online_compile)
4 changes: 2 additions & 2 deletions host/driver_online/conv_fwd_driver_online.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ int main(int argc, char* argv[])
using size_t = std::size_t;

hipStream_t stream;
olCompile::Handle* handle;
online_compile::Handle* handle;

MY_HIP_CHECK(hipStreamCreate(&stream));

handle = new olCompile::Handle(stream);
handle = new online_compile::Handle(stream);

constexpr auto I0 = Number<0>{};
constexpr auto I1 = Number<1>{};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ template <typename TInWei,
typename InLeftPads,
typename InRightPads>
void online_device_dynamic_convolution_forward_implicit_gemm_v4r4_dlops_nchw_kcyx_nkhw(
olCompile::Handle* handle,
online_compile::Handle* handle,
const InLengths& in_n_c_hi_wi_lengths,
const WeiLengths& wei_k_c_y_x_lengths,
const OutLengths& out_n_k_ho_wo_lengths,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ template <typename TInWei,
typename InLeftPads,
typename InRightPads>
void online_device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw(
olCompile::Handle* handle,
online_compile::Handle* handle,
const InLengths& in_n_c_hi_wi_lengths,
const WeiLengths& wei_k_c_y_x_lengths,
const OutLengths& out_n_k_ho_wo_lengths,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ template <typename TInWei,
typename InLeftPads,
typename InRightPads>
void online_device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nhwc_kyxc_nhwk(
olCompile::Handle* handle,
online_compile::Handle* handle,
const InLengths& in_n_hi_wi_c_lengths,
const WeiLengths& wei_k_y_x_c_lengths,
const OutLengths& out_n_ho_wo_k_lengths,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ template <typename TInWei,
typename InLeftPads,
typename InRightPads>
void online_device_dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw(
olCompile::Handle* handle,
online_compile::Handle* handle,
const InLengths& in_n_c_hi_wi_lengths,
const WeiLengths& wei_k_c_y_x_lengths,
const OutLengths& out_n_k_ho_wo_lengths,
Expand Down Expand Up @@ -100,13 +100,13 @@ void online_device_dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcy
"dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcyx_nkhw.cpp";
std::string algo_name = "implicit_gemm_conv_fwd_v6r1_dlops_nchw";

std::string compile_param_string = " -std=c++17 " + compile_param.GetCompileParameterString();
std::string compile_param_string = get_ck_hip_online_compile_common_flag() + compile_param.GetCompileParameterString();
std::string network_config = compile_param_string;

std::vector<float> kernel1_times;
std::vector<float> kernel2_times;

for(index_t i = 0; i < nrepeat; ++i)
for(index_t i = 0; i < nrepeat + 1; ++i)
{
KernelTimer timer1, timer2;
std::string kernel_name;
Expand Down Expand Up @@ -164,11 +164,11 @@ void online_device_dynamic_convolution_forward_implicit_gemm_v6r1_dlops_nchw_kcy
auto ave_time1 =
std::accumulate(
std::next(kernel1_times.begin()), kernel1_times.end(), 0., std::plus<float>{}) /
(nrepeat - 1);
nrepeat;
auto ave_time2 =
std::accumulate(
std::next(kernel2_times.begin()), kernel2_times.end(), 0., std::plus<float>{}) /
(nrepeat - 1);
nrepeat;

float perf = (float)(conv_problem_desc.CalculateFlop()) /
(std::size_t(1000) * 1000 * 1000) / (ave_time1 + ave_time2);
Expand Down
7 changes: 7 additions & 0 deletions host/driver_online/include/online_driver_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@

namespace ck_driver {

inline auto get_ck_hip_online_compile_common_flag()
{
std::string param = " -std=c++17";

return param;
}

// greatest common divisor, aka highest common factor
inline int gcd(int x, int y)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ else()
set(OLC_DEBUG 0)
endif()

configure_file("${PROJECT_SOURCE_DIR}/host/online_compilation/include/config.h.in" "${PROJECT_BINARY_DIR}/host/online_compilation/include/config.h")
configure_file("${PROJECT_SOURCE_DIR}/host/online_compile/include/config.h.in" "${PROJECT_BINARY_DIR}/host/online_compile/include/config.h")

include_directories(BEFORE
${PROJECT_BINARY_DIR}/host/online_compilation/include
${PROJECT_BINARY_DIR}/host/online_compile/include
)

message(STATUS "Hip compiler flags: ${HIP_COMPILER_FLAGS}")
Expand All @@ -97,7 +97,7 @@ set(ONLINE_COMPILATION_SOURCE
)

include_directories(BEFORE
${PROJECT_BINARY_DIR}/host/online_compilation/include
${PROJECT_BINARY_DIR}/host/online_compile/include
include
)

Expand Down Expand Up @@ -152,17 +152,17 @@ add_custom_command(
)

## the library target
add_library(online_compilation SHARED ${ONLINE_COMPILATION_SOURCE})
add_library(online_compile SHARED ${ONLINE_COMPILATION_SOURCE})

target_include_directories(online_compilation PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/online_compilation/include/)
target_include_directories(online_compilation PRIVATE ${PROJECT_BINARY_DIR})
target_include_directories(online_compilation PRIVATE ${PROJECT_SOURCE_DIR}/external/half/include/)
target_include_directories(online_compile PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/online_compile/include/)
target_include_directories(online_compile PRIVATE ${PROJECT_BINARY_DIR})
target_include_directories(online_compile PRIVATE ${PROJECT_SOURCE_DIR}/external/half/include/)

target_link_libraries(online_compilation PRIVATE hip::device)
target_link_libraries(online_compilation INTERFACE hip::host)
target_link_libraries(online_compilation PRIVATE Boost::filesystem)
target_link_libraries(online_compile PRIVATE hip::device)
target_link_libraries(online_compile INTERFACE hip::host)
target_link_libraries(online_compile PRIVATE Boost::filesystem)

target_compile_features(online_compilation PUBLIC)
set_target_properties(online_compilation PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_features(online_compile PUBLIC)
set_target_properties(online_compile PROPERTIES POSITION_INDEPENDENT_CODE ON)

install(TARGETS online_compilation LIBRARY DESTINATION lib)
install(TARGETS online_compile LIBRARY DESTINATION lib)
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
#include <fstream>
#include <iostream>

namespace olCompile {
namespace online_compile {

OLC_DECLARE_ENV_VAR(OLC_DISABLE_CACHE)
OLC_DECLARE_ENV_VAR(HOME)
Expand All @@ -62,22 +62,22 @@ boost::filesystem::path GetCachePath()
return user_path;
}

static bool IsCacheDisabled() { return olCompile::IsEnabled(OLC_DISABLE_CACHE{}); }
static bool IsCacheDisabled() { return online_compile::IsEnabled(OLC_DISABLE_CACHE{}); }

boost::filesystem::path
GetCacheFile(const std::string& device, const std::string& name, const std::string& args)
{
// std::string filename = (is_kernel_str ? olCompile::md5(name) : name) + ".o";
// std::string filename = (is_kernel_str ? online_compile::md5(name) : name) + ".o";
std::string filename = name + ".o";
return GetCachePath() / olCompile::md5(device + ":" + args) / filename;
return GetCachePath() / online_compile::md5(device + ":" + args) / filename;
}

boost::filesystem::path LoadBinary(const TargetProperties& target,
const size_t num_cu,
const std::string& name,
const std::string& args)
{
if(olCompile::IsCacheDisabled())
if(online_compile::IsCacheDisabled())
return {};

(void)num_cu;
Expand All @@ -97,7 +97,7 @@ void SaveBinary(const boost::filesystem::path& binary_path,
const std::string& name,
const std::string& args)
{
if(olCompile::IsCacheDisabled())
if(online_compile::IsCacheDisabled())
{
boost::filesystem::remove(binary_path);
}
Expand All @@ -109,4 +109,4 @@ void SaveBinary(const boost::filesystem::path& binary_path,
}
}

} // namespace olCompile
} // namespace online_compile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
#include <sys/wait.h>
#endif // __linux__

namespace olCompile {
namespace online_compile {
namespace exec {

int Run(const std::string& p, std::istream* in, std::ostream* out)
Expand All @@ -53,7 +53,7 @@ int Run(const std::string& p, std::istream* in, std::ostream* out)
OLC_MANAGE_PTR(FILE*, pclose) pipe{popen(p.c_str(), file_mode)};

if(!pipe)
throw std::runtime_error("olCompile::exec::Run(): popen(" + p + ", " + file_mode +
throw std::runtime_error("online_compile::exec::Run(): popen(" + p + ", " + file_mode +
") failed");

if(redirect_stdin || redirect_stdout)
Expand All @@ -74,7 +74,7 @@ int Run(const std::string& p, std::istream* in, std::ostream* out)
buffer[in->gcount()] = 0;

if(fputs(buffer.data(), pipe.get()) == EOF)
throw std::runtime_error("olCompile::exec::Run(): fputs() failed");
throw std::runtime_error("online_compile::exec::Run(): fputs() failed");
}
}
}
Expand All @@ -90,4 +90,4 @@ int Run(const std::string& p, std::istream* in, std::ostream* out)
}

} // namespace exec
} // namespace olCompile
} // namespace online_compile
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@

OLC_DECLARE_ENV_VAR(OLC_DEVICE_CU)

namespace olCompile {
namespace online_compile {

std::size_t GetAvailableMemory()
{
Expand Down Expand Up @@ -182,24 +182,24 @@ KernelInvoke Handle::Run(Kernel k) const { return k.Invoke(this->GetStream()); }

Program Handle::LoadProgram(const std::string& program_name, std::string params) const
{
if((!olCompile::EndsWith(program_name, ".mlir-cpp")) &&
(!olCompile::EndsWith(program_name, ".mlir")))
if((!online_compile::EndsWith(program_name, ".mlir-cpp")) &&
(!online_compile::EndsWith(program_name, ".mlir")))
{
params += " -mcpu=" + this->GetTargetProperties().Name();
}

auto hsaco = olCompile::LoadBinary(
auto hsaco = online_compile::LoadBinary(
this->GetTargetProperties(), this->GetMaxComputeUnits(), program_name, params);
if(hsaco.empty())
{
auto p = HIPOCProgram{program_name, params, this->GetTargetProperties()};

auto path = olCompile::GetCachePath() / boost::filesystem::unique_path();
auto path = online_compile::GetCachePath() / boost::filesystem::unique_path();
if(p.IsCodeObjectInMemory())
olCompile::WriteFile(p.GetCodeObjectBlob(), path);
online_compile::WriteFile(p.GetCodeObjectBlob(), path);
else
boost::filesystem::copy_file(p.GetCodeObjectPathname(), path);
olCompile::SaveBinary(path, this->GetTargetProperties(), program_name, params);
online_compile::SaveBinary(path, this->GetTargetProperties(), program_name, params);

return p;
}
Expand Down Expand Up @@ -245,7 +245,7 @@ std::size_t Handle::GetGlobalMemorySize() const
std::size_t Handle::GetMaxComputeUnits() const
{
int result;
const char* const num_cu = olCompile::GetStringEnv(OLC_DEVICE_CU{});
const char* const num_cu = online_compile::GetStringEnv(OLC_DEVICE_CU{});
if(num_cu != nullptr && strlen(num_cu) > 0)
{
return boost::lexical_cast<std::size_t>(num_cu);
Expand Down Expand Up @@ -282,4 +282,4 @@ std::ostream& Handle::Print(std::ostream& os) const
return os;
}

} // namespace olCompile
} // namespace online_compile
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ OLC_DECLARE_ENV_VAR(OLC_DEBUG_HIP_DUMP)

#define OLC_HIP_COMPILER "/opt/rocm/llvm/bin/clang++"

namespace olCompile {
namespace online_compile {

bool IsHccCompiler()
{
Expand Down Expand Up @@ -155,12 +155,12 @@ static boost::filesystem::path HipBuildImpl(boost::optional<TmpDir>& tmp_dir,
params += " -mllvm -amdgpu-function-calls=false";
}

if(olCompile::IsEnabled(OLC_DEBUG_HIP_VERBOSE{}))
if(online_compile::IsEnabled(OLC_DEBUG_HIP_VERBOSE{}))
{
params += " -v";
}

if(olCompile::IsEnabled(OLC_DEBUG_HIP_DUMP{}))
if(online_compile::IsEnabled(OLC_DEBUG_HIP_DUMP{}))
{
if(IsHccCompiler())
{
Expand Down Expand Up @@ -247,7 +247,7 @@ static external_tool_version_t HipCompilerVersionImpl()
break;

std::stringstream out;
if(olCompile::exec::Run(path + " --version", nullptr, &out) != 0)
if(online_compile::exec::Run(path + " --version", nullptr, &out) != 0)
break;

std::string line;
Expand Down Expand Up @@ -343,4 +343,4 @@ bool operator<=(const external_tool_version_t& lhs, const external_tool_version_
return !(lhs > rhs);
}

} // namespace olCompile
} // namespace online_compile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#include <chrono>
#include <thread>

namespace olCompile {
namespace online_compile {

void HIPOCKernelInvoke::run(void* args, std::size_t size) const
{
Expand Down Expand Up @@ -81,4 +81,4 @@ HIPOCKernelInvoke HIPOCKernel::Invoke(hipStream_t stream,
{
return HIPOCKernelInvoke{stream, fun, ldims, gdims, name, callback};
}
} // namespace olCompile
} // namespace online_compile
Loading

0 comments on commit 437cc59

Please sign in to comment.