diff --git a/src/backend/gencomm.py b/src/backend/gencomm.py index e4c9452c..c9469b8d 100755 --- a/src/backend/gencomm.py +++ b/src/backend/gencomm.py @@ -11,6 +11,9 @@ ## loop through the derived and basic types to generate individual ## pack functions derived_types = [ "hvector", "blkhindx", "hindexed", "contig", "resized" ] +type_map = { } +type_ops = [["pack", "acc_unpack"], ["pack", "acc_unpack"], ["pack", "acc_unpack"], ["pack", "acc_unpack", "sum"], ["pack", "acc_unpack", "sum"], \ + ["pack", "acc_unpack", "sum"], ["pack", "acc_unpack", "sum"], ["pack", "acc_unpack", "sum"], ["pack", "acc_unpack", "sum"], ["pack", "acc_unpack", "sum"]] ######################################################################################## @@ -24,6 +27,11 @@ def child_type_str(typelist): def switcher_builtin_element(backend, OUTFILE, blklens, typelist, pupstr, key, val): yutils.display(OUTFILE, "case %s:\n" % key.upper()) + d = val + if (key == "YAKSA_TYPE__long_double"): + d = "long double"; + n = int + n = type_map.get(d, "-1") if (len(typelist) == 0): t = "" @@ -43,16 +51,28 @@ def switcher_builtin_element(backend, OUTFILE, blklens, typelist, pupstr, key, v else: yutils.display(OUTFILE, "default:\n") yutils.display(OUTFILE, "if (max_nesting_level >= %d) {\n" % nesting_level) - yutils.display(OUTFILE, "%s->pack = yaksuri_%si_%s_blklen_%s_%s;\n" % (backend, backend, pupstr, blklen, val)) - yutils.display(OUTFILE, "%s->acc_unpack = yaksuri_%si_acc_un%s_blklen_%s_%s;\n" % (backend, backend, pupstr, blklen, val)) + for p in type_ops[n]: + if (backend != "seq" and p != "pack" and p!="acc_unpack"): + continue + yutils.display(OUTFILE, "%s->%s = yaksuri_%si_%s%s_blklen_%s_%s;\n" % (backend, p, backend, p, pupstr, blklen, val)) + #yutils.display(OUTFILE, "%s->acc_unpack = yaksuri_%si_acc_un%s_blklen_%s_%s;\n" % (backend, backend, pupstr, blklen, val)) yutils.display(OUTFILE, "}\n") yutils.display(OUTFILE, "break;\n") yutils.display(OUTFILE, "}\n") - else: + elif (t != ""): yutils.display(OUTFILE, "if (max_nesting_level >= %d) {\n" % nesting_level) - yutils.display(OUTFILE, "%s->pack = yaksuri_%si_%s_%s;\n" % (backend, backend, pupstr, val)) - yutils.display(OUTFILE, "%s->acc_unpack = yaksuri_%si_acc_un%s_%s;\n" % (backend, backend, pupstr, val)) + for p in type_ops[n]: + if (backend != "seq" and p != "pack" and p!="acc_unpack"): + continue + yutils.display(OUTFILE, "%s->%s = yaksuri_%si_%s%s_%s;\n" % (backend, p, backend, p, pupstr, val)) + #yutils.display(OUTFILE, "%s->acc_unpack = yaksuri_%si_acc_un%s_%s;\n" % (backend, backend, pupstr, val)) yutils.display(OUTFILE, "}\n") + else: + for p in type_ops[n]: + if (p == "pack" or p == "acc_unpack"): + continue + yutils.display(OUTFILE, "%s->%s = yaksuri_%si_%s_%s;\n" % (backend, p, backend, p, val)) + #yutils.display(OUTFILE, "}\n") if (t != ""): typelist.append(t) @@ -97,6 +117,11 @@ def switcher(backend, OUTFILE, blklens, builtin_types, builtin_maps, typelist, p ######################################################################################## def populate_pupfns(pup_max_nesting, backend, blklens, builtin_types, builtin_maps): ##### generate the switching logic to select pup functions + n = 0 + for b in builtin_types: + type_map[b] = n + n = n + 1 + filename = "src/backend/%s/pup/yaksuri_%si_populate_pupfns.c" % (backend, backend) yutils.copyright_c(filename) OUTFILE = open(filename, "a") @@ -133,6 +158,10 @@ def populate_pupfns(pup_max_nesting, backend, blklens, builtin_types, builtin_ma yutils.display(OUTFILE, "}\n") yutils.display(OUTFILE, "break;\n") yutils.display(OUTFILE, "\n") + yutils.display(OUTFILE, "case YAKSI_TYPE_KIND__BUILTIN:\n") + yutils.display(OUTFILE, "rc = yaksuri_%si_populate_pupfns_builtin(type);\n" % backend) + yutils.display(OUTFILE, "break;\n") + yutils.display(OUTFILE, "\n") yutils.display(OUTFILE, "default:\n") yutils.display(OUTFILE, " break;\n") yutils.display(OUTFILE, "}\n") @@ -170,7 +199,8 @@ def populate_pupfns(pup_max_nesting, backend, blklens, builtin_types, builtin_ma yutils.display(OUTFILE, "}\n") yutils.display(OUTFILE, "\n") - pupstr = "pack_%s_%s" % (dtype1, dtype2) + pupstr = "_%s_%s" % (dtype1, dtype2) + typelist = [ dtype1, dtype2 ] switcher(backend, OUTFILE, blklens, builtin_types, builtin_maps, typelist, pupstr, pup_max_nesting - 1) yutils.display(OUTFILE, "\n") @@ -205,7 +235,7 @@ def populate_pupfns(pup_max_nesting, backend, blklens, builtin_types, builtin_ma yutils.display(OUTFILE, "}\n") yutils.display(OUTFILE, "\n") - pupstr = "pack_%s" % dtype1 + pupstr = "_%s" % dtype1 typelist = [ dtype1 ] switcher_builtin(backend, OUTFILE, blklens, builtin_types, builtin_maps, typelist, pupstr) yutils.display(OUTFILE, "\n") @@ -213,6 +243,33 @@ def populate_pupfns(pup_max_nesting, backend, blklens, builtin_types, builtin_ma yutils.display(OUTFILE, "}\n") OUTFILE.close() + filename = "src/backend/%s/pup/yaksuri_%si_populate_pupfns_builtin.c" % (backend, backend) + yutils.copyright_c(filename) + OUTFILE = open(filename, "a") + yutils.display(OUTFILE, "#include \n") + yutils.display(OUTFILE, "#include \n") + yutils.display(OUTFILE, "#include \n") + yutils.display(OUTFILE, "#include \"yaksi.h\"\n") + yutils.display(OUTFILE, "#include \"yaksu.h\"\n") + yutils.display(OUTFILE, "#include \"yaksuri_%si.h\"\n" % backend) + yutils.display(OUTFILE, "#include \"yaksuri_%si_populate_pupfns.h\"\n" % backend) + yutils.display(OUTFILE, "#include \"yaksuri_%si_pup.h\"\n" % backend) + yutils.display(OUTFILE, "\n") + yutils.display(OUTFILE, "int yaksuri_%si_populate_pupfns_builtin(yaksi_type_s * type)\n" % backend) + yutils.display(OUTFILE, "{\n") + yutils.display(OUTFILE, "int rc = YAKSA_SUCCESS;\n") + yutils.display(OUTFILE, "yaksuri_%si_type_s *%s = (yaksuri_%si_type_s *) type->backend.%s.priv;\n" \ + % (backend, backend, backend, backend)) + yutils.display(OUTFILE, "\n") + + pupstr = "" + typelist = [ ] + switcher_builtin(backend, OUTFILE, blklens, builtin_types, builtin_maps, typelist, pupstr) + yutils.display(OUTFILE, "\n") + yutils.display(OUTFILE, "return rc;\n") + yutils.display(OUTFILE, "}\n") + OUTFILE.close() + ##### generate the Makefile for the pup function selection functions filename = "src/backend/%s/pup/Makefile.populate_pupfns.mk" % backend yutils.copyright_makefile(filename) @@ -222,6 +279,7 @@ def populate_pupfns(pup_max_nesting, backend, blklens, builtin_types, builtin_ma for dtype2 in derived_types: yutils.display(OUTFILE, "\tsrc/backend/%s/pup/yaksuri_%si_populate_pupfns_%s_%s.c \\\n" % (backend, backend, dtype1, dtype2)) yutils.display(OUTFILE, "\tsrc/backend/%s/pup/yaksuri_%si_populate_pupfns_%s_builtin.c \\\n" % (backend, backend, dtype1)) + yutils.display(OUTFILE, "\tsrc/backend/%s/pup/yaksuri_%si_populate_pupfns_builtin.c \\\n" % (backend, backend)) yutils.display(OUTFILE, "\tsrc/backend/%s/pup/yaksuri_%si_populate_pupfns.c\n" % (backend, backend)) yutils.display(OUTFILE, "\n") yutils.display(OUTFILE, "noinst_HEADERS += \\\n") @@ -239,6 +297,7 @@ def populate_pupfns(pup_max_nesting, backend, blklens, builtin_types, builtin_ma for dtype2 in derived_types: yutils.display(OUTFILE, "int yaksuri_%si_populate_pupfns_%s_%s(yaksi_type_s * type);\n" % (backend, dtype1, dtype2)) yutils.display(OUTFILE, "int yaksuri_%si_populate_pupfns_%s_builtin(yaksi_type_s * type);\n" % (backend, dtype1)) + yutils.display(OUTFILE, "int yaksuri_%si_populate_pupfns_builtin(yaksi_type_s * type);\n" % backend) yutils.display(OUTFILE, "\n") yutils.display(OUTFILE, "#endif /* YAKSURI_%sI_POPULATE_PUPFNS_H_INCLUDED */\n" % backend.upper()) OUTFILE.close() diff --git a/src/backend/seq/genpup.py b/src/backend/seq/genpup.py index 06ab128b..390362f2 100755 --- a/src/backend/seq/genpup.py +++ b/src/backend/seq/genpup.py @@ -116,12 +116,37 @@ def resized(suffix, b, blklen, last): ######################################################################################## ##### Core kernels ######################################################################################## -def generate_kernels(b, darray, blklen): +def generate_kernels(b, darray, blklen, op): global num_paren_open global s # we don't need pup kernels for basic types if (len(darray) == 0): + if (op == "pack" or op == "acc_unpack"): + return + s = "int yaksuri_seqi_%s_" % op + b.replace(" ", "_") + yutils.display(OUTFILE, "%s(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type)\n" % s), + yutils.display(OUTFILE, "{\n") + + ##### variable declarations + # generic variables + yutils.display(OUTFILE, "int rc = YAKSA_SUCCESS;\n"); + yutils.display(OUTFILE, "const %s *restrict sbuf = (const %s *) inbuf;\n" % (b, b)); + yutils.display(OUTFILE, "%s *restrict dbuf = (%s *) outbuf;\n" % (b, b)); + yutils.display(OUTFILE, "\n"); + + yutils.display(OUTFILE, "for (int i = 0; i < count; i++) {\n") + num_paren_open += 1 + if (op == "sum"): + yutils.display(OUTFILE, "dbuf[i] += sbuf[i];\n") + + for x in range(num_paren_open): + yutils.display(OUTFILE, "}\n") + num_paren_open = 0 + yutils.display(OUTFILE, "\n"); + yutils.display(OUTFILE, "return rc;\n") + yutils.display(OUTFILE, "}\n\n") + return # individual blocklength optimization is only for @@ -129,7 +154,10 @@ def generate_kernels(b, darray, blklen): if (darray[-1] != "hvector" and darray[-1] != "blkhindx" and blklen != "generic"): return - for func in "pack","acc_unpack": + funclist = [ ] + funclist.append(op) + + for func in funclist: ##### figure out the function name to use s = "int yaksuri_seqi_%s_" % func for d in darray: @@ -172,8 +200,11 @@ def generate_kernels(b, darray, blklen): if (func == "pack"): yutils.display(OUTFILE, "dbuf[idx++] = sbuf[%s];\n" % s) - else: + elif (func == "acc_unpack"): yutils.display(OUTFILE, "dbuf[%s] = sbuf[idx++];\n" % s) + else: + yutils.display(OUTFILE, "dbuf[%s] += sbuf[idx++];\n" % s) + for x in range(num_paren_open): yutils.display(OUTFILE, "}\n") num_paren_open = 0 @@ -196,6 +227,27 @@ def generate_kernels(b, darray, blklen): print("===> ERROR: pup-max-nesting must be positive") sys.exit(1) + n = 0; + for b in builtin_types: + gencomm.type_map[b] = n; + n = n + 1; + + ##### generate the reduction kernels for built-in types + for b in builtin_types: + filename = "src/backend/seq/pup/yaksuri_seqi_pup_%s.c" % (b.replace(" ","_")) + yutils.copyright_c(filename) + OUTFILE = open(filename, "a") + yutils.display(OUTFILE, "#include \n") + yutils.display(OUTFILE, "#include \n") + yutils.display(OUTFILE, "#include \n") + yutils.display(OUTFILE, "#include \"yaksuri_seqi_pup.h\"\n") + yutils.display(OUTFILE, "\n") + for p in gencomm.type_ops[gencomm.type_map.get(b)]: + emptylist = [ ] + generate_kernels(b, emptylist, 0, p) + + OUTFILE.close() + ##### generate the core pack/acc_unpack kernels (single level) for b in builtin_types: for d in gencomm.derived_types: @@ -207,13 +259,12 @@ def generate_kernels(b, darray, blklen): yutils.display(OUTFILE, "#include \n") yutils.display(OUTFILE, "#include \"yaksuri_seqi_pup.h\"\n") yutils.display(OUTFILE, "\n") - - emptylist = [ ] - emptylist.append(d) - for blklen in blklens: - generate_kernels(b, emptylist, blklen) - emptylist.pop() - + for p in gencomm.type_ops[gencomm.type_map.get(b)]: + emptylist = [ ] + emptylist.append(d) + for blklen in blklens: + generate_kernels(b, emptylist, blklen, p) + emptylist.pop() OUTFILE.close() ##### generate the core pack/acc_unpack kernels (more than one level) @@ -230,18 +281,17 @@ def generate_kernels(b, darray, blklen): yutils.display(OUTFILE, "#include \n") yutils.display(OUTFILE, "#include \"yaksuri_seqi_pup.h\"\n") yutils.display(OUTFILE, "\n") - - for darray in darraylist: - darray.append(d1) - darray.append(d2) - for blklen in blklens: - generate_kernels(b, darray, blklen) - darray.pop() - darray.pop() - + for p in gencomm.type_ops[gencomm.type_map.get(b)]: + for darray in darraylist: + darray.append(d1) + darray.append(d2) + for blklen in blklens: + generate_kernels(b, darray, blklen, p) + darray.pop() + darray.pop() OUTFILE.close() - ##### generate the core pack/acc_unpack kernel declarations + ##### generate the core pack/acc_unpack and reduction kernel declarations filename = "src/backend/seq/pup/yaksuri_seqi_pup.h" yutils.copyright_c(filename) OUTFILE = open(filename, "a") @@ -253,6 +303,14 @@ def generate_kernels(b, darray, blklen): yutils.display(OUTFILE, "#include \"yaksi.h\"\n") yutils.display(OUTFILE, "\n") + for b in builtin_types: + for p in gencomm.type_ops[gencomm.type_map.get(b)]: + if (p == "pack" or p == "acc_unpack"): + continue + s = "int yaksuri_seqi_%s_" % p + b.replace(" ", "_") + yutils.display(OUTFILE, "%s" % s), + yutils.display(OUTFILE, "(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type);\n") + darraylist = [ ] yutils.generate_darrays(gencomm.derived_types, darraylist, args.pup_max_nesting) for b in builtin_types: @@ -268,9 +326,8 @@ def generate_kernels(b, darray, blklen): and blklen != "generic"): continue - for func in "pack","acc_unpack": - ##### figure out the function name to use - s = "int yaksuri_seqi_%s_" % func + for p in gencomm.type_ops[gencomm.type_map.get(b)]: + s = "int yaksuri_seqi_%s_" % p for d in darray: s = s + "%s_" % d # hvector and hindexed get blklen-specific function names @@ -290,12 +347,16 @@ def generate_kernels(b, darray, blklen): OUTFILE = open(filename, "a") yutils.display(OUTFILE, "libyaksa_la_SOURCES += \\\n") for b in builtin_types: + #reduction kernels for built-in types + yutils.display(OUTFILE, "\tsrc/backend/seq/pup/yaksuri_seqi_pup_%s.c \\\n" % \ + (b.replace(" ","_"))) for d1 in gencomm.derived_types: + #for p in type_ops[type_map.get(b)]: yutils.display(OUTFILE, "\tsrc/backend/seq/pup/yaksuri_seqi_pup_%s_%s.c \\\n" % \ - (d1, b.replace(" ","_"))) + (d1, b.replace(" ","_"))) for d2 in gencomm.derived_types: yutils.display(OUTFILE, "\tsrc/backend/seq/pup/yaksuri_seqi_pup_%s_%s_%s.c \\\n" % \ - (d1, d2, b.replace(" ","_"))) + (d1, d2, b.replace(" ","_"))) yutils.display(OUTFILE, "\tsrc/backend/seq/pup/yaksuri_seq_pup.c\n") yutils.display(OUTFILE, "\n") yutils.display(OUTFILE, "noinst_HEADERS += \\\n") diff --git a/src/backend/seq/include/yaksuri_seq_post.h b/src/backend/seq/include/yaksuri_seq_post.h index 7e62910e..c06157ae 100644 --- a/src/backend/seq/include/yaksuri_seq_post.h +++ b/src/backend/seq/include/yaksuri_seq_post.h @@ -19,6 +19,6 @@ int yaksuri_seq_pup_is_supported(yaksi_type_s * type, bool * is_supported); int yaksuri_seq_ipack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_info_s * info, yaksi_type_s * type); int yaksuri_seq_iacc_unpack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_info_s * info, - yaksi_type_s * type); + yaksi_type_s * type, yaksa_op_t op); #endif /* YAKSURI_SEQ_H_INCLUDED */ diff --git a/src/backend/seq/include/yaksuri_seqi.h b/src/backend/seq/include/yaksuri_seqi.h index d842d6d2..636aec75 100644 --- a/src/backend/seq/include/yaksuri_seqi.h +++ b/src/backend/seq/include/yaksuri_seqi.h @@ -11,6 +11,7 @@ typedef struct yaksuri_seqi_type_s { int (*pack) (const void *inbuf, void *outbuf, uintptr_t count, struct yaksi_type_s *); int (*acc_unpack) (const void *inbuf, void *outbuf, uintptr_t count, struct yaksi_type_s *); + int (*sum) (const void *inbuf, void *outbuf, uintptr_t count, struct yaksi_type_s *); } yaksuri_seqi_type_s; #define YAKSURI_SEQI_INFO__DEFAULT_IOV_PUP_THRESHOLD (16384) diff --git a/src/backend/seq/pup/yaksuri_seq_pup.c b/src/backend/seq/pup/yaksuri_seq_pup.c index 780d73cb..ec9816d9 100644 --- a/src/backend/seq/pup/yaksuri_seq_pup.c +++ b/src/backend/seq/pup/yaksuri_seq_pup.c @@ -93,7 +93,7 @@ int yaksuri_seq_ipack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_in } int yaksuri_seq_iacc_unpack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_info_s * info, - yaksi_type_s * type) + yaksi_type_s * type, yaksa_op_t op) { int rc = YAKSA_SUCCESS; yaksuri_seqi_type_s *seq_type = (yaksuri_seqi_type_s *) type->backend.seq.priv; @@ -104,56 +104,59 @@ int yaksuri_seq_iacc_unpack(const void *inbuf, void *outbuf, uintptr_t count, ya iov_unpack_threshold = seq_info->iov_unpack_threshold; } - if (type->is_contig) { - memcpy((char *) outbuf + type->true_lb, inbuf, type->size * count); - } else if (type->size / type->num_contig >= iov_unpack_threshold) { - struct iovec *iov; - uintptr_t actual_iov_len; - - if (type->num_contig * count <= MAX_IOV_LENGTH) { - iov = (struct iovec *) malloc(type->num_contig * count * sizeof(struct iovec)); - - rc = yaksi_iov(outbuf, count, type, 0, iov, MAX_IOV_LENGTH, &actual_iov_len); - YAKSU_ERR_CHECK(rc, fn_fail); - assert(actual_iov_len == type->num_contig * count); - - const char *sbuf = (const char *) inbuf; - for (uintptr_t i = 0; i < actual_iov_len; i++) { - memcpy(iov[i].iov_base, sbuf, iov[i].iov_len); - sbuf += iov[i].iov_len; - } + if (op == YAKSA_OP__SUM) { + rc = seq_type->sum(inbuf, outbuf, count, type); + YAKSU_ERR_CHECK(rc, fn_fail); + } else { + if (type->is_contig) { + memcpy((char *) outbuf + type->true_lb, inbuf, type->size * count); + } else if (type->size / type->num_contig >= iov_unpack_threshold) { + struct iovec *iov; + uintptr_t actual_iov_len; - free(iov); - } else if (type->num_contig <= MAX_IOV_LENGTH) { - iov = (struct iovec *) malloc(type->num_contig * sizeof(struct iovec)); + if (type->num_contig * count <= MAX_IOV_LENGTH) { + iov = (struct iovec *) malloc(type->num_contig * count * sizeof(struct iovec)); - uintptr_t iov_offset = 0; - char *dbuf = (char *) outbuf; - const char *sbuf = (const char *) inbuf; - for (uintptr_t i = 0; i < count; i++) { - rc = yaksi_iov(dbuf, 1, type, iov_offset, iov, MAX_IOV_LENGTH, &actual_iov_len); + rc = yaksi_iov(outbuf, count, type, 0, iov, MAX_IOV_LENGTH, &actual_iov_len); YAKSU_ERR_CHECK(rc, fn_fail); - assert(actual_iov_len == type->num_contig); + assert(actual_iov_len == type->num_contig * count); - for (uintptr_t j = 0; j < actual_iov_len; j++) { - memcpy(iov[j].iov_base, sbuf, iov[j].iov_len); - sbuf += iov[j].iov_len; + const char *sbuf = (const char *) inbuf; + for (uintptr_t i = 0; i < actual_iov_len; i++) { + memcpy(iov[i].iov_base, sbuf, iov[i].iov_len); + sbuf += iov[i].iov_len; + } + free(iov); + } else if (type->num_contig <= MAX_IOV_LENGTH) { + iov = (struct iovec *) malloc(type->num_contig * sizeof(struct iovec)); + + uintptr_t iov_offset = 0; + char *dbuf = (char *) outbuf; + const char *sbuf = (const char *) inbuf; + for (uintptr_t i = 0; i < count; i++) { + rc = yaksi_iov(dbuf, 1, type, iov_offset, iov, MAX_IOV_LENGTH, &actual_iov_len); + YAKSU_ERR_CHECK(rc, fn_fail); + assert(actual_iov_len == type->num_contig); + + for (uintptr_t j = 0; j < actual_iov_len; j++) { + memcpy(iov[j].iov_base, sbuf, iov[j].iov_len); + sbuf += iov[j].iov_len; + } + + dbuf += type->extent; } - dbuf += type->extent; + free(iov); + } else { + rc = YAKSA_ERR__NOT_SUPPORTED; } - - free(iov); + } else if (seq_type->acc_unpack) { + rc = seq_type->acc_unpack(inbuf, outbuf, count, type); + YAKSU_ERR_CHECK(rc, fn_fail); } else { rc = YAKSA_ERR__NOT_SUPPORTED; } - } else if (seq_type->acc_unpack) { - rc = seq_type->acc_unpack(inbuf, outbuf, count, type); - YAKSU_ERR_CHECK(rc, fn_fail); - } else { - rc = YAKSA_ERR__NOT_SUPPORTED; } - fn_exit: return rc; fn_fail: diff --git a/src/backend/src/yaksur_pup.c b/src/backend/src/yaksur_pup.c index d293c85b..62208441 100644 --- a/src/backend/src/yaksur_pup.c +++ b/src/backend/src/yaksur_pup.c @@ -176,7 +176,7 @@ int yaksur_ipack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s request_backend->kind = YAKSURI_REQUEST_KIND__STAGED; rc = yaksuri_progress_enqueue(inbuf, outbuf, count, type, request, - inattr, outattr, YAKSURI_PUPTYPE__PACK, info); + inattr, outattr, YAKSURI_PUPTYPE__PACK, info, YAKSA_OP__REPLACE); YAKSU_ERR_CHECK(rc, fn_fail); rc = yaksuri_progress_poke(); @@ -226,7 +226,7 @@ int yaksur_iacc_unpack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_t if (!is_supported) { rc = YAKSA_ERR__NOT_SUPPORTED; } else { - rc = yaksuri_seq_iacc_unpack(inbuf, outbuf, count, info, type); + rc = yaksuri_seq_iacc_unpack(inbuf, outbuf, count, info, type, op); YAKSU_ERR_CHECK(rc, fn_fail); } goto fn_exit; @@ -315,7 +315,7 @@ int yaksur_iacc_unpack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_t request_backend->kind = YAKSURI_REQUEST_KIND__STAGED; rc = yaksuri_progress_enqueue(inbuf, outbuf, count, type, request, - inattr, outattr, YAKSURI_PUPTYPE__UNPACK, info); + inattr, outattr, YAKSURI_PUPTYPE__UNPACK, info, op); YAKSU_ERR_CHECK(rc, fn_fail); rc = yaksuri_progress_poke(); diff --git a/src/backend/src/yaksuri.h b/src/backend/src/yaksuri.h index 599086f2..280b00b3 100644 --- a/src/backend/src/yaksuri.h +++ b/src/backend/src/yaksuri.h @@ -48,7 +48,7 @@ typedef struct { int yaksuri_progress_enqueue(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type, yaksi_request_s * request, yaksur_ptr_attr_s inattr, yaksur_ptr_attr_s outattr, yaksuri_puptype_e puptype, - yaksi_info_s * info); + yaksi_info_s * info, yaksa_op_t op); int yaksuri_progress_poke(void); #endif /* YAKSURI_H_INCLUDED */ diff --git a/src/backend/src/yaksuri_progress.c b/src/backend/src/yaksuri_progress.c index 1ee17281..78b2506b 100644 --- a/src/backend/src/yaksuri_progress.c +++ b/src/backend/src/yaksuri_progress.c @@ -35,6 +35,7 @@ typedef struct progress_elem_s { void *outbuf; uintptr_t count; yaksi_type_s *type; + yaksa_op_t op; uintptr_t completed_count; uintptr_t issued_count; @@ -85,7 +86,7 @@ static int progress_dequeue(progress_elem_s * elem) int yaksuri_progress_enqueue(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type, yaksi_request_s * request, yaksur_ptr_attr_s inattr, yaksur_ptr_attr_s outattr, yaksuri_puptype_e puptype, - yaksi_info_s * info) + yaksi_info_s * info, yaksa_op_t op) { int rc = YAKSA_SUCCESS; @@ -108,6 +109,7 @@ int yaksuri_progress_enqueue(const void *inbuf, void *outbuf, uintptr_t count, y newelem->pup.outbuf = outbuf; newelem->pup.count = count; newelem->pup.type = type; + newelem->pup.op = op; newelem->pup.completed_count = 0; newelem->pup.issued_count = 0; newelem->pup.subop_head = newelem->pup.subop_tail = NULL; @@ -460,7 +462,7 @@ int yaksuri_progress_poke(void) char *dbuf = (char *) elem->pup.outbuf + subop->count_offset * elem->pup.type->extent; rc = yaksuri_seq_iacc_unpack(subop->host_tmpbuf, dbuf, subop->count, elem->info, - elem->pup.type); + elem->pup.type, elem->pup.op); YAKSU_ERR_CHECK(rc, fn_fail); } @@ -603,7 +605,7 @@ int yaksuri_progress_poke(void) rc = yaksuri_seq_iacc_unpack(sbuf, subop->host_tmpbuf, subop->count * elem->pup.type->size, elem->info, - byte_type); + byte_type, elem->pup.op); YAKSU_ERR_CHECK(rc, fn_fail); rc = yaksuri_global.gpudriver[id].info->iacc_unpack(subop->host_tmpbuf, dbuf, diff --git a/test/simple/Makefile.mk b/test/simple/Makefile.mk index 416e6e88..b82b8400 100644 --- a/test/simple/Makefile.mk +++ b/test/simple/Makefile.mk @@ -8,10 +8,12 @@ EXTRA_DIST += $(top_srcdir)/test/simple/testlist.gen EXTRA_PROGRAMS += \ test/simple/simple_test \ + test/simple/simple_test1 \ test/simple/threaded_test test_simple_simple_test_CPPFLAGS = $(test_cppflags) test_simple_threaded_test_CPPFLAGS = $(test_cppflags) +test_simple_simple_test1_CPPFLAGS = $(test_cppflags) test-simple: @$(top_srcdir)/test/runtests.py --summary=$(top_builddir)/test/simple/summary.junit.xml \ diff --git a/test/simple/simple_test1.c b/test/simple/simple_test1.c new file mode 100644 index 00000000..2eb5b439 --- /dev/null +++ b/test/simple/simple_test1.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) by Argonne National Laboratory + * See COPYRIGHT in top-level directory + */ + +#include +#include "yaksa.h" +#include + +#define DIMSIZE (80) + +int inbuf[DIMSIZE * DIMSIZE], outbuf[DIMSIZE * DIMSIZE]; + +int main() +{ + int rc = YAKSA_SUCCESS; + yaksa_type_t vector, vector_vector; + uintptr_t actual; + + yaksa_init(NULL); + int idx = 0; + + for (int i = 0; i < DIMSIZE * DIMSIZE; i++) { + inbuf[i] = i; + outbuf[i] = -1; + } + + rc = yaksa_type_create_vector(3, 2, 3, YAKSA_TYPE__INT, NULL, &vector); + assert(rc == YAKSA_SUCCESS); + + yaksa_request_t request; + rc = yaksa_ipack(inbuf, 1, vector, 0, outbuf, DIMSIZE * DIMSIZE * sizeof(int), &actual, + NULL, &request); + assert(rc == YAKSA_SUCCESS); + + /* Here inbuf is the dest buffer that contains strided datai, outbuf contains packed data */ + rc = yaksa_iacc(outbuf, actual, inbuf, 1, vector, 0, YAKSA_OP__SUM, + NULL, &request); + assert(rc == YAKSA_SUCCESS); + + rc = yaksa_request_wait(request); + assert(rc == YAKSA_SUCCESS); + + idx = 0; + for (int i = 0; i < 3*2; i++) { + if (i == 2 || i ==5) + continue; + if (inbuf[i] != i*2) + fprintf(stderr, "Error at i = %d\n", i); + } + + yaksa_type_free(vector_vector); + yaksa_type_free(vector); + + yaksa_finalize(); + + return 0; +}