Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
6a12a35
Implement T1-1-41: erf erf erf.py pixel_shuffle pixel_shuffle matrix…
LaiQuan-conquer Dec 13, 2025
025df97
Merge branch 'InfiniTensor:main' into 2025-autumn-LaiQuan-conquer-T1-…
LaiQuan-conquer Mar 5, 2026
23ace0d
Add implementation plan
Mar 5, 2026
e628c62
Ignore RLCR state directory
Mar 5, 2026
2a9a869
Ignore and untrack plan.md for RLCR
Mar 6, 2026
99e13d9
Build: fix nv-gpu compile blockers
Mar 6, 2026
b2d2e71
MatrixPower: GPU-resident NVIDIA implementation
Mar 6, 2026
9bf36ab
PixelShuffle: make NVIDIA kernel stride-aware
Mar 6, 2026
377f1c0
MatrixPower: handle strided tensors on NVIDIA
Mar 6, 2026
d3103a9
Harness: add infiniop-vs-torch NVIDIA validator
Mar 6, 2026
8a05846
infiniop: export C API for erf/erfc/erfinv/matrix_power/pixel_shuffle
Mar 6, 2026
a1fee29
TensorDescriptor: fix hasBroadcastDim indexing
Mar 6, 2026
3f4bbb6
Harness: add negative stride validation cases
Mar 6, 2026
c693032
cpu: fix matrix_power workspace/params; include <limits> for erfinv
Mar 6, 2026
bd9a193
matrix_power/pixel_shuffle: tighten CPU descriptor validation; fix me…
Mar 6, 2026
46e9a7f
metax/moore: validate pixel_shuffle; sync matrix_power H2D; erfinv do…
Mar 6, 2026
339054a
metax: close namespaces in erf/erfc/erfinv
Mar 6, 2026
d0673dd
matrix_power(cpu): use provided workspace for fp16/bf16
Mar 6, 2026
c9fd3b5
infiniop.h: include erf/erfc/erfinv/matrix_power/pixel_shuffle
Mar 6, 2026
706b0c2
validate: use current CUDA device; improve Windows path regex
Mar 6, 2026
1991fb6
pixel_shuffle: include metax/moore kernel common for cuda_bfloat16
Mar 6, 2026
5220dd9
matrix_power: include metax/moore kernel common for CHECK_* macros
Mar 6, 2026
9bc84ad
cuda: fix erf/erfc/erfinv half/bf16 float conversion
Mar 6, 2026
aef3c58
nvidia: no-op on empty tensors for matrix_power/pixel_shuffle
Mar 6, 2026
da09635
metax/moore: pixel_shuffle no-op on empty outputs
Mar 6, 2026
df5a9b5
matrix_power(cpu): no workspace needed for n==0
Mar 6, 2026
15c118c
Enable infiniop dispatch fallback and hybrid erfinv
Mar 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ __pycache__/
# Cache
cache/

# Humanize RLCR loop state
.humanize/

# RLCR plan file (untracked mode)
plan.md

# JSON
*.json

Expand Down
5 changes: 5 additions & 0 deletions include/infiniop.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
#include "infiniop/ops/conv.h"
#include "infiniop/ops/dequantize_awq.h"
#include "infiniop/ops/embedding.h"
#include "infiniop/ops/erf.h"
#include "infiniop/ops/erfc.h"
#include "infiniop/ops/erfinv.h"
#include "infiniop/ops/flash_attention.h"
#include "infiniop/ops/gelu.h"
#include "infiniop/ops/gemm.h"
Expand All @@ -18,11 +21,13 @@
#include "infiniop/ops/layer_norm.h"
#include "infiniop/ops/logsoftmax.h"
#include "infiniop/ops/lp_norm.h"
#include "infiniop/ops/matrix_power.h"
#include "infiniop/ops/mul.h"
#include "infiniop/ops/ones.h"
#include "infiniop/ops/paged_attention.h"
#include "infiniop/ops/paged_attention_prefill.h"
#include "infiniop/ops/paged_caching.h"
#include "infiniop/ops/pixel_shuffle.h"
#include "infiniop/ops/quant/per_channel_quant_int8.h"
#include "infiniop/ops/random_sample.h"
#include "infiniop/ops/rearrange.h"
Expand Down
24 changes: 24 additions & 0 deletions include/infiniop/ops/erf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef __INFINIOP_ERF_API_H__
#define __INFINIOP_ERF_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopErfDescriptor_t;

__C __export infiniStatus_t infiniopCreateErfDescriptor(infiniopHandle_t handle,
infiniopErfDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x);

__C __export infiniStatus_t infiniopGetErfWorkspaceSize(infiniopErfDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopErf(infiniopErfDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);

__C __export infiniStatus_t infiniopDestroyErfDescriptor(infiniopErfDescriptor_t desc);

#endif
24 changes: 24 additions & 0 deletions include/infiniop/ops/erfc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef __INFINIOP_ERFC_API_H__
#define __INFINIOP_ERFC_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopErfcDescriptor_t;

__C __export infiniStatus_t infiniopCreateErfcDescriptor(infiniopHandle_t handle,
infiniopErfcDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x);

__C __export infiniStatus_t infiniopGetErfcWorkspaceSize(infiniopErfcDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopErfc(infiniopErfcDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);

__C __export infiniStatus_t infiniopDestroyErfcDescriptor(infiniopErfcDescriptor_t desc);

#endif
24 changes: 24 additions & 0 deletions include/infiniop/ops/erfinv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef __INFINIOP_ERFINV_API_H__
#define __INFINIOP_ERFINV_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopErfinvDescriptor_t;

__C __export infiniStatus_t infiniopCreateErfinvDescriptor(infiniopHandle_t handle,
infiniopErfinvDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x);

__C __export infiniStatus_t infiniopGetErfinvWorkspaceSize(infiniopErfinvDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopErfinv(infiniopErfinvDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);

__C __export infiniStatus_t infiniopDestroyErfinvDescriptor(infiniopErfinvDescriptor_t desc);

#endif
25 changes: 25 additions & 0 deletions include/infiniop/ops/matrix_power.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#ifndef __INFINIOP_MATRIX_POWER_API_H__
#define __INFINIOP_MATRIX_POWER_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopMatrixPowerDescriptor_t;

__C __export infiniStatus_t infiniopCreateMatrixPowerDescriptor(infiniopHandle_t handle,
infiniopMatrixPowerDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
int n);

__C __export infiniStatus_t infiniopGetMatrixPowerWorkspaceSize(infiniopMatrixPowerDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopMatrixPower(infiniopMatrixPowerDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);

__C __export infiniStatus_t infiniopDestroyMatrixPowerDescriptor(infiniopMatrixPowerDescriptor_t desc);

#endif
25 changes: 25 additions & 0 deletions include/infiniop/ops/pixel_shuffle.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#ifndef __INFINIOP_PIXEL_SHUFFLE_API_H__
#define __INFINIOP_PIXEL_SHUFFLE_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopPixelShuffleDescriptor_t;

__C __export infiniStatus_t infiniopCreatePixelShuffleDescriptor(infiniopHandle_t handle,
infiniopPixelShuffleDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
int upscale_factor);

__C __export infiniStatus_t infiniopGetPixelShuffleWorkspaceSize(infiniopPixelShuffleDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopPixelShuffle(infiniopPixelShuffleDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);

__C __export infiniStatus_t infiniopDestroyPixelShuffleDescriptor(infiniopPixelShuffleDescriptor_t desc);

#endif
25 changes: 25 additions & 0 deletions python/infinicore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,28 @@
getattr(ntops.torch, op_name).__globals__["torch"] = sys.modules[__name__]

use_ntops = True

# ----------------------------------------------------------------------
# Test runner dispatch fallback (no edits under test/infinicore/)
# ----------------------------------------------------------------------
with contextlib.suppress(Exception):
from ._infiniop_dispatch import (
erf,
erfc,
erfinv,
install_framework_base_patch,
matrix_power,
pixel_shuffle,
)

install_framework_base_patch()

__all__.extend(
[
"erf",
"erfc",
"erfinv",
"matrix_power",
"pixel_shuffle",
]
)
Loading