Lokr support

2023-07-17 00:29:07 +03:00 · 2023-07-17 00:29:07 +03:00 · 46466f09d0
commit 46466f09d0
parent 58c3df32f3
3 changed files with 67 additions and 1 deletions
--- a/extensions-builtin/Lora/network_ia3.py
+++ b/extensions-builtin/Lora/network_ia3.py
@ -1,4 +1,3 @@
-import lyco_helpers
 import network
 import network_lyco

--- a/extensions-builtin/Lora/network_lokr.py
+++ b/extensions-builtin/Lora/network_lokr.py
@ -0,0 +1,65 @@
+import torch
+
+import lyco_helpers
+import network
+import network_lyco
+
+
+class ModuleTypeLokr(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        has_1 = "lokr_w1" in weights.w or ("lokr_w1a" in weights.w and "lokr_w1b" in weights.w)
+        has_2 = "lokr_w2" in weights.w or ("lokr_w2a" in weights.w and "lokr_w2b" in weights.w)
+        if has_1 and has_2:
+            return NetworkModuleLokr(net, weights)
+
+        return None
+
+
+def make_kron(orig_shape, w1, w2):
+    if len(w2.shape) == 4:
+        w1 = w1.unsqueeze(2).unsqueeze(2)
+    w2 = w2.contiguous()
+    return torch.kron(w1, w2).reshape(orig_shape)
+
+
+class NetworkModuleLokr(network_lyco.NetworkModuleLyco):
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+
+        self.w1 = weights.w.get("lokr_w1")
+        self.w1a = weights.w.get("lokr_w1_a")
+        self.w1b = weights.w.get("lokr_w1_b")
+        self.dim = self.w1b.shape[0] if self.w1b else self.dim
+        self.w2 = weights.w.get("lokr_w2")
+        self.w2a = weights.w.get("lokr_w2_a")
+        self.w2b = weights.w.get("lokr_w2_b")
+        self.dim = self.w2b.shape[0] if self.w2b else self.dim
+        self.t2 = weights.w.get("lokr_t2")
+
+    def calc_updown(self, orig_weight):
+        if self.w1 is not None:
+            w1 = self.w1.to(orig_weight.device, dtype=orig_weight.dtype)
+        else:
+            w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
+            w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
+            w1 = w1a @ w1b
+
+        if self.w2 is not None:
+            w2 = self.w2.to(orig_weight.device, dtype=orig_weight.dtype)
+        elif self.t2 is None:
+            w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2 = w2a @ w2b
+        else:
+            t2 = self.t2.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
+
+        output_shape = [w1.size(0) * w2.size(0), w1.size(1) * w2.size(1)]
+        if len(orig_weight.shape) == 4:
+            output_shape = orig_weight.shape
+
+        updown = make_kron(output_shape, w1, w2)
+
+        return self.finalize_updown(updown, orig_weight, output_shape)
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@ -5,6 +5,7 @@ import network
 import network_lora
 import network_hada
 import network_ia3
+import network_lokr

 import torch
 from typing import Union
@ -15,6 +16,7 @@ module_types = [
    network_lora.ModuleTypeLora(),
    network_hada.ModuleTypeHada(),
    network_ia3.ModuleTypeIa3(),
+    network_lokr.ModuleTypeLokr(),
 ]