feat: add quantized qwen2-0.5b model (#44)

bil-ash · web-flow · commit 0617dbb47a6f · 2024-06-25T23:33:30.000-04:00
to add support for quantized(q4f16) qwen2-0.5b
diff --git a/app/constant.ts b/app/constant.ts
@@ -347,6 +347,23 @@ export const DEFAULT_MODELS: ModelRecord[] = [
       temperature: 0.7,
       top_p: 0.95,
     },
+  },
+   {
+    name: "Qwen2-0.5B-Instruct-q4f16-MLC",
+    display_name: "Qwen",
+    provider: "Alibaba",
+    size: "0.5B",
+    quantization: "q4f16_1",
+    context_length: "4k",
+    family: "Qwen 2",
+    vram_required_MB: 500, //rough estimate
+    low_resource_required: true,
+    recommended_config: {
+      temperature: 0.7,
+      presence_penalty: 0,
+      frequency_penalty: 0,
+      top_p: 0.8,
+    },
   },
   {
     name: "Qwen2-0.5B-Instruct-q0f16-MLC",