Discussion : ussing nonlinearity and normalization in the operations

Hello I am using the tensor contraction to as a layer in Lux.jl - and it works no problem, here implementation if it would be usefull for somebody.
```

struct TensorOpLayer_str <: Lux.AbstractExplicitLayer
    param_shape
    operation_expression::Expr
end

function Lux.initialparameters(rng::AbstractRNG, l::TensorOpLayer_str)
    P=rand(rng,Float32,l.param_shape...)
    return (P=P,)
end

function extract_double_brackets(s::String)
    pattern = r"\(\(.*?\)\)"
    matches = eachmatch(pattern, s)
    return [match.match for match in matches]
end

function parse_tuple_from_string(s)
    # Parse the string into an expression
    expr = Meta.parse(s)
    
    # Evaluate the expression to get the tuple
    result = eval(expr)
    
    return result
end


function Lux.initialstates(::AbstractRNG, l::TensorOpLayer_str)::NamedTuple
    ex=l.operation_expression
    parser=TensorOperations.tensorparser(ex,:allocator=>TensorOperations.CUDAAllocator(),:backend=>TensorOperations.cuTENSORBackend())
    parsed_ex=parser(ex)
    arg_tuples=extract_double_brackets(string(parsed_ex.args[3]))
    arg_tuples=map(parse_tuple_from_string,arg_tuples)

    return (PA=arg_tuples[1],PB=arg_tuples[2],PC=arg_tuples[3])

end


function (l::TensorOpLayer_str)(x, ps, st::NamedTuple)

    res = TensorOperations.tensoralloc_contract(Float32, x, st.PA, false, ps.P, st.PB, false, st.PC, Val{false}(), TensorOperations.CUDAAllocator{CUDA.UnifiedMemory, CUDA.DeviceMemory, CUDA.DeviceMemory}())
    res = TensorOperations.tensorcontract!(res, x, st.PA, false, ps.P, st.PB, false, st.PC, VectorInterface.One(), VectorInterface.Zero(), TensorOperations.cuTENSORBackend(), TensorOperations.CUDAAllocator{CUDA.UnifiedMemory, CUDA.DeviceMemory, CUDA.DeviceMemory}())
    res=swish.(res)

    return res, st
end
```
Maybe it is not perfect as it does some not pretty reverse engineering as I was unable to get a return statement from tensor macro when expression is passed to a layer state  but  it works.

Then I invoke them like 

```
            , TensorOpLayer_str((num_directions, primary_sv_repr, num_channels), :(res[b, f, n, e] := x[b, c, f, n, d] * P[d, e, c]))
            , GroupNorm(num_params_exec, num_params_exec, affine=true)
            , TensorOpLayer_str((num_params_exec, primary_sv_repr, final_sv_repr), :(res[b, f, e] := x[b, f, n, d] * P[n, d, e]))
            , LayerNorm((batch_size, flat_sv_len))
```
this is only part of a network - I had divided operation into multiple smaller operations to do non linearity (here swish) and normalization (numbers are growing fast and leads to instabilities in training without some kind of normalization).

Hovewer and here some problem - I know that operation would be far more performant I I would combine operations like 
```
res[b, f, n, e] := x[b, c, f, n, d] * P[d, e, c])
res[b, f, e] := x[b, f, n, d] * P[n, d, e]
```
etc into one operation - Hovewer In this way I would be unable to add nonlinearities and normalization between each step - is there a better way to achieve this then what I did ?

Thanks for help !

 


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Discussion : ussing nonlinearity and normalization in the operations #187

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Discussion : ussing nonlinearity and normalization in the operations #187

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions