Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "TensorKit"
uuid = "07d1fe3e-3e46-537d-9eac-e9e13d0d4cec"
authors = ["Jutho Haegeman, Lukas Devos"]
version = "0.16.3"
authors = ["Jutho Haegeman, Lukas Devos"]

[deps]
LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637"
Expand Down Expand Up @@ -53,7 +53,7 @@ Printf = "1"
Random = "1"
SafeTestsets = "0.1"
ScopedValues = "1.3.0"
Strided = "2"
Strided = "2.3.4"
TensorKitSectors = "0.3.5"
TensorOperations = "5.1"
Test = "1"
Expand Down Expand Up @@ -87,3 +87,6 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"

[targets]
test = ["ArgParse", "Adapt", "Aqua", "AllocCheck", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote", "Mooncake", "JET"]

[sources]
Strided = {url = "https://github.com/QuantumKitHub/Strided.jl", rev = "ksh/copyto"}
2 changes: 1 addition & 1 deletion ext/TensorKitCUDAExt/TensorKitCUDAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ using TensorKit.Factorizations
using TensorKit.Strided
using TensorKit.Factorizations: AbstractAlgorithm
using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype, project_symmetric_and_check
import TensorKit: randisometry, rand, randn
import TensorKit: randisometry, rand, randn, _copyto!, _add_general_kernel_nonthreaded!, blocktype

using TensorKit: MatrixAlgebraKit

Expand Down
64 changes: 50 additions & 14 deletions ext/TensorKitCUDAExt/cutensormap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ const AdjointCuTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂,
function CuTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A}
return CuTensorMap{T, S, N₁, N₂}(CuArray{T}(t.data), space(t))
end
function TensorMap{T, S, N₁, N₂, DA}(t::TensorMap{T, S, N₁, N₂, HA}) where {T, S, N₁, N₂, DA <: CuArray{T}, HA <: Array{T}}
return CuTensorMap{T, S, N₁, N₂}(CuArray{T}(t.data), space(t))
end

# project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy
function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: CuVector{T}}
Expand All @@ -17,6 +20,10 @@ function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::Abstr
return TensorKit.TensorMapWithStorage{T, A}(A(h_t.data), V)
end

function TensorKit.blocktype(::Type{<:CuTensorMap{T, S}}) where {T, S}
return CuMatrix{T, CUDA.DeviceMemory}
end

Comment on lines +23 to +26
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
function TensorKit.blocktype(::Type{<:CuTensorMap{T, S}}) where {T, S}
return CuMatrix{T, CUDA.DeviceMemory}
end

I think this is now more properly addressed through type inference.

for (fname, felt) in ((:zeros, :zero), (:ones, :one))
@eval begin
function CUDA.$fname(
Expand Down Expand Up @@ -101,18 +108,6 @@ function TensorKit.scalar(t::CuTensorMap{T, S, 0, 0}) where {T, S}
return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)]
end

function Base.convert(
TT::Type{CuTensorMap{T, S, N₁, N₂}},
t::AbstractTensorMap{<:Any, S, N₁, N₂}
) where {T, S, N₁, N₂}
if typeof(t) === TT
return t
else
tnew = TT(undef, space(t))
return copy!(tnew, t)
end
end

function LinearAlgebra.isposdef(t::CuTensorMap)
domain(t) == codomain(t) ||
throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same"))
Expand All @@ -138,10 +133,9 @@ function Base.promote_rule(
return CuTensorMap{T, S, N₁, N₂}
end

TensorKit.promote_storage_rule(::Type{CuArray{T, N}}, ::Type{<:CuArray{T, N}}) where {T, N} =
TensorKit.promote_storage_rule(::Type{<:CuArray{T, N}}, ::Type{<:CuArray{T, N}}) where {T, N} =
CuArray{T, N, CUDA.default_memory}


# CuTensorMap exponentation:
function TensorKit.exp!(t::CuTensorMap)
domain(t) == codomain(t) ||
Expand All @@ -168,3 +162,45 @@ for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth)
return tf
end
end

function TensorKit._add_general_kernel_nonthreaded!(
tdst::CuTensorMap, tsrc::CuTensorMap, p, transformer::TensorKit.GenericTreeTransformer, α, β, backend...
)
# preallocate buffers
buffers = TensorKit.allocate_buffers(tdst, tsrc, transformer)

for subtransformer in transformer.data
# Special case without intermediate buffers whenever there is only a single block
if length(subtransformer[1]) == 1
TensorKit._add_transform_single!(tdst, tsrc, p, subtransformer, α, β, backend...)
else
cu_subtransformer = tuple(CUDA.adapt(CuArray, subtransformer[1]), subtransformer[2:end]...)
TensorKit._add_transform_multi!(tdst, tsrc, p, cu_subtransformer, buffers, α, β, backend...)
end
end
return nothing
end
Comment on lines +166 to +182
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the only change here is to promote the unitary basis transformation into a CuArray, which probably makes more sense to just support at the mul callsite (which I think @kshyatt already fixed, so this might no longer be required?)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remove it and see! 😈


function TensorKit.allocate_buffers(
tdst::CuTensorMap, tsrc::CuTensorMap, transformer::TensorKit.GenericTreeTransformer
)
sz = TensorKit.buffersize(transformer)
# force zeros to ensure the buffers are empty
# otherwise memory re-use can fill them with garbage data
return CUDA.zeros(eltype(tdst.data), sz), CUDA.zeros(eltype(tsrc.data), sz)
end
Comment on lines +184 to +191
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is slightly confusing to me, the zeros shouldn't be necessary (in fact, the implementation reuses the start of the buffer for each of the different blocks anyways), so I would have guessed that the similar(tsrc.data, sz) calls should be sufficient and correctly allocate device arrays here?


function LinearAlgebra.mul!(
tC::CuTensorMap, tA::TensorKit.BraidingTensor, tB::CuTensorMap, α = true, β = false
)
return mul!(tC, CUDA.adapt(CuArray, TensorMap(tA)), tB, α, β)
end

function LinearAlgebra.mul!(
tC::CuTensorMap, tA::CuTensorMap, tB::TensorKit.BraidingTensor, α = true, β = false
)
return mul!(tC, tA, CUDA.adapt(CuArray, TensorMap(tB)), α, β)
end

@inline TensorKit.promote_storagetype(::Type{T}, A::CuTensorMap, B::TensorKit.BraidingTensor) where {T <: Number} = similarstoragetype(A, T)
@inline TensorKit.promote_storagetype(::Type{T}, A::TensorKit.BraidingTensor, B::CuTensorMap) where {T <: Number} = similarstoragetype(B, T)
20 changes: 15 additions & 5 deletions src/tensors/abstracttensor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,11 @@ storagetype(t) = storagetype(typeof(t))
function storagetype(::Type{T}) where {T <: AbstractTensorMap}
if T isa Union
# attempt to be slightly more specific by promoting unions
Ma = storagetype(T.a)
Mb = storagetype(T.b)
return promote_storagetype(Ma, Mb)
return promote_storagetype(T.a, T.b)
elseif eltype(T) isa Union
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this to better support BlockTensorMap? Do we ever have tensors with union scalartypes?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it's for the block case. I don't think we can have scalar unions?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a bit weird to support that here, since for generic AbstractTensorMap eltype would return a Number, which is why I asked about the scalartype thing. Maybe we can just copy this definition and overload in BlockTensorKit for AbstractBlockTensorMap?

# attempt to be slightly more specific by promoting unions
TU = eltype(T)
return promote_storagetype(TU.a, TU.b)
else
# fallback definition by using scalartype
return similarstoragetype(scalartype(T))
Expand Down Expand Up @@ -103,11 +105,19 @@ similarstoragetype(X::Type, ::Type{T}) where {T <: Number} =

# implement on tensors
similarstoragetype(::Type{TT}) where {TT <: AbstractTensorMap} = similarstoragetype(storagetype(TT))
similarstoragetype(::Type{TT}, ::Type{T}) where {TT <: AbstractTensorMap, T <: Number} =
similarstoragetype(storagetype(TT), T)
function similarstoragetype(::Type{TT}, ::Type{T}) where {TT <: AbstractTensorMap, T <: Number}
return similarstoragetype(storagetype(TT), T)
end
Comment on lines +108 to +110
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just a formatting change right?

function similarstoragetype(::Type{<:AbstractTensorMap{T, S, N₁, N₂}}, ::Type{TA}) where {T <: Number, TA <: DenseVector, S, N₁, N₂}
return similarstoragetype(TA, T)
end
function similarstoragetype(t::AbstractTensorMap{T, S, N₁, N₂}, ::Type{TA}) where {T <: Number, TA <: DenseVector, S, N₁, N₂}
return similarstoragetype(typeof(t), TA)
end

# implement on arrays
similarstoragetype(::Type{A}) where {A <: DenseVector{<:Number}} = A
similarstoragetype(::Type{A}, ::Type{A}) where {A <: DenseVector{<:Number}} = A
Base.@assume_effects :foldable similarstoragetype(::Type{A}) where {A <: AbstractArray{<:Number}} =
Core.Compiler.return_type(similar, Tuple{A, Int})
Base.@assume_effects :foldable similarstoragetype(::Type{A}, ::Type{T}) where {A <: AbstractArray, T <: Number} =
Expand Down
2 changes: 2 additions & 0 deletions src/tensors/adjoint.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Base.adjoint(t::AbstractTensorMap) = AdjointTensorMap(t)
space(t::AdjointTensorMap) = adjoint(space(parent(t)))
dim(t::AdjointTensorMap) = dim(parent(t))
storagetype(::Type{AdjointTensorMap{T, S, N₁, N₂, TT}}) where {T, S, N₁, N₂, TT} = storagetype(TT)
similarstoragetype(::AdjointTensorMap{T, S, N₁, N₂, TT}, ::Type{T′}) where {T, S, N₁, N₂, TT, T′ <: Number} = similarstoragetype(TT, T′)
similarstoragetype(::AdjointTensorMap{T, S, N₁, N₂, TT}, ::Type{TA}) where {T, S, N₁, N₂, TT, TA <: DenseVector} = similarstoragetype(TT, TA)

# Blocks and subblocks
#----------------------
Expand Down
25 changes: 15 additions & 10 deletions src/tensors/braidingtensor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,10 @@ function block(b::BraidingTensor, s::Sector)
# TODO: probably always square?
m = blockdim(codomain(b), s)
n = blockdim(domain(b), s)
data = Matrix{eltype(b)}(undef, (m, n))
data = zeros(eltype(b), (m, n))

length(data) == 0 && return data # s ∉ blocksectors(b)

data = fill!(data, zero(eltype(b)))

V1, V2 = codomain(b)
if sectortype(b) === Trivial
d1, d2 = dim(V1), dim(V2)
Expand Down Expand Up @@ -171,12 +169,15 @@ end
has_shared_permute(t::BraidingTensor, ::Index2Tuple) = false
function add_transform!(
tdst::AbstractTensorMap,
tsrc::BraidingTensor, (p₁, p₂)::Index2Tuple,
tsrc::BraidingTensor{T, S},
(p₁, p₂)::Index2Tuple,
fusiontreetransform,
α::Number, β::Number, backend::AbstractBackend...
)
) where {T, S}
tsrc_map = similar(tdst, storagetype(tdst), space(tsrc))
copy!(tsrc_map, tsrc)
return add_transform!(
tdst, TensorMap(tsrc), (p₁, p₂), fusiontreetransform, α, β,
tdst, tsrc_map, (p₁, p₂), fusiontreetransform, α, β,
backend...
)
end
Expand Down Expand Up @@ -276,11 +277,15 @@ function planarcontract!(
backend, allocator
)
# special case only defined for contracting 2 indices
length(oindB) == length(cindB) == 2 ||
if !(length(oindB) == length(cindB) == 2)
# horrible!!!!!
tB′ = TensorMap(B)
tB = TensorMapWithStorage{eltype(B), similarstoragetype(A, eltype(B)), spacetype(tB′), numout(tB′), numin(tB′)}(tB′)
return planarcontract!(
C, A, (oindA, cindA), TensorMap(B), (cindB, oindB), (p1, p2),
α, β, backend, allocator
)
C, A, (oindA, cindA), tB, (cindB, oindB), (p1, p2),
α, β, backend, allocator
)
end

codA, domA = codomainind(A), domainind(A)
codB, domB = codomainind(B), domainind(B)
Expand Down
2 changes: 1 addition & 1 deletion src/tensors/diagonal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ end
# ----------------
function TO.tensoradd_type(TC, A::DiagonalTensorMap, ::Index2Tuple{1, 1}, ::Bool)
M = similarstoragetype(A, TC)
return DiagonalTensorMap{TC, spacetype(A), M}
return DiagonalTensorMap{scalartype(M), spacetype(A), M}
end

function TO.tensorcontract_type(
Expand Down
12 changes: 7 additions & 5 deletions src/tensors/indexmanipulations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ for (operation, manipulation) in (
$promote_op(::Type{T}, ::Type{I}) where {T <: Number, I <: Sector} =
sectorscalartype(I) <: Integer ? T :
sectorscalartype(I) <: Real ? float(T) : complex(T)
$promote_op(::Type{TA}, ::Type{I}) where {TA <: DenseVector, I <: Sector} =
similarstoragetype(TA, $promote_op(eltype(TA), I))
# TODO: currently the manipulations all use sectorscalartype, change to:
# $manipulation_scalartype(I) <: Integer ? T :
# $manipulation_scalartype(I) <: Real ? float(T) : complex(T)
Expand Down Expand Up @@ -342,11 +344,11 @@ See also [`insertrightunit`](@ref insertrightunit(::AbstractTensorMap, ::Val{i})
"""
function insertleftunit(
t::AbstractTensorMap, ::Val{i} = Val(numind(t) + 1);
copy::Bool = false, conj::Bool = false, dual::Bool = false
copy::Bool = false, conj::Bool = false, dual::Bool = false,
) where {i}
W = insertleftunit(space(t), Val(i); conj, dual)
if t isa TensorMap
return TensorMap{scalartype(t)}(copy ? Base.copy(t.data) : t.data, W)
return TensorMapWithStorage{scalartype(t), storagetype(t)}(copy ? Base.copy(t.data) : t.data, W)
else
tdst = similar(t, W)
for (c, b) in blocks(t)
Expand All @@ -371,11 +373,11 @@ See also [`insertleftunit`](@ref insertleftunit(::AbstractTensorMap, ::Val{i}) w
"""
function insertrightunit(
t::AbstractTensorMap, ::Val{i} = Val(numind(t));
copy::Bool = false, conj::Bool = false, dual::Bool = false
copy::Bool = false, conj::Bool = false, dual::Bool = false,
) where {i}
W = insertrightunit(space(t), Val(i); conj, dual)
if t isa TensorMap
return TensorMap{scalartype(t)}(copy ? Base.copy(t.data) : t.data, W)
return TensorMapWithStorage{scalartype(t), storagetype(t)}(copy ? Base.copy(t.data) : t.data, W)
else
tdst = similar(t, W)
for (c, b) in blocks(t)
Expand All @@ -400,7 +402,7 @@ and [`insertrightunit`](@ref insertrightunit(::AbstractTensorMap, ::Val{i}) wher
function removeunit(t::AbstractTensorMap, ::Val{i}; copy::Bool = false) where {i}
W = removeunit(space(t), Val(i))
if t isa TensorMap
return TensorMap{scalartype(t)}(copy ? Base.copy(t.data) : t.data, W)
return TensorMapWithStorage{scalartype(t), storagetype(t)}(copy ? Base.copy(t.data) : t.data, W)
else
tdst = similar(t, W)
for (c, b) in blocks(t)
Expand Down
7 changes: 4 additions & 3 deletions src/tensors/tensoroperations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -376,9 +376,10 @@ function blas_contract!(
twistB = false
end

TTC = storagetype(C)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this effectively means that we are deciding to promote inputs to the storagetype of the output. I'm not sure if I am fully convinced that we should solve this automatically at all, since I think that is also inconsistent with how regular matrices work (same for adding):

julia> CUDA.rand(2, 2) * rand(Float32, 2, 2)
ERROR: Scalar indexing is disallowed.

I do think that this might be the right approach, and requiring explicit conversions in the cases of mixed inputs seems like the right call to me. (Even though I can see how that is annoying for MPSKit 😉 )

# Bring A in the correct form for BLAS contraction
if copyA
Anew = TO.tensoralloc_add(TC, A, pA, false, Val(true), allocator)
Anew = TO.tensoralloc_add(TTC, A, pA, false, Val(true), allocator)
Anew = TO.tensoradd!(Anew, A, pA, false, One(), Zero(), backend, allocator)
twistA && twist!(Anew, filter(!isdual ∘ Base.Fix1(space, Anew), domainind(Anew)))
else
Expand All @@ -388,7 +389,7 @@ function blas_contract!(

# Bring B in the correct form for BLAS contraction
if copyB
Bnew = TO.tensoralloc_add(TC, B, pB, false, Val(true), allocator)
Bnew = TO.tensoralloc_add(TTC, B, pB, false, Val(true), allocator)
Bnew = TO.tensoradd!(Bnew, B, pB, false, One(), Zero(), backend, allocator)
twistB && twist!(Bnew, filter(isdual ∘ Base.Fix1(space, Bnew), codomainind(Bnew)))
else
Expand All @@ -401,7 +402,7 @@ function blas_contract!(
copyC = !TO.isblasdestination(C, ipAB)

if copyC
Cnew = TO.tensoralloc_add(TC, C, ipAB, false, Val(true), allocator)
Cnew = TO.tensoralloc_add(TTC, C, ipAB, false, Val(true), allocator)
mul!(Cnew, Anew, Bnew)
TO.tensoradd!(C, Cnew, pAB, false, α, β, backend, allocator)
TO.tensorfree!(Cnew, allocator)
Expand Down
Loading
Loading