Differences

This shows you the differences between two versions of the page.

--- ai:ollama-openwebui [2024/06/27 15:12] – Wulf Rajek
+++ ai:ollama-openwebui [2024/08/08 17:31] (current) – Wulf Rajek
@@ Line 1: / Line 1: @@
 ====== Ollama Open-Webui ======
+Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
 Notes only for now:
@@ Line 14: / Line 16: @@
 mkdir /opt/openedai-speech/tts-config
 mkdir /opt/pipelines
-mkdir /opt/whisper
+mkdir /opt/docker-ssl-proxy
+mkdir /opt/faster-whisper-server
 </code>
 <code - docker-ollama.yml>
+name: ollama
 services:
   ollama:
@@ Line 27: / Line 31: @@
       - 11434:11434
     #runtime: nvidia
+    restart: unless-stopped
     deploy:
       resources:
@@ Line 32: / Line 37: @@
           devices:
           - driver: nvidia
-            device_ids: ['0']
+            #device_ids: ['0']
+            count: 1
             capabilities: [gpu]
 </code>
 <code - docker-openwebui.yml>
+name: open-webui
 services:
   open-webui:
@@ Line 52: / Line 59: @@
     volumes:
       - /opt/open-webui:/app/backend/data
-    restart: always
+    restart: unless-stopped
     extra_hosts:
       host.docker.internal: host-gateway
     environment:
-      - WEBUI_NAME="CustomGPT"
+      - WEBUI_NAME=CustomGPTName
+      - TZ=Europe/London
+      - RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE=True # allow sentencetransformers to execute code like for alibaba-nlp/gte-large-en-v1.5
 </code>
 <code - docker-openedai-speech.yml>
+name: openedai-speech
 services:
   openedai-speech:
@@ Line 92: / Line 103: @@
 <code - docker-pipelines.yml>
+name: pipelines
 services:
   pipelines:
@@ Line 111: / Line 123: @@
             capabilities: [gpu]
 </code>
+https://zohaib.me/extending-openwebui-using-pipelines/
 Under settings->connections set:
@@ Line 116: / Line 131: @@
   - OPENAI API Key: 0p3n-w3bu!
-<code - docker-whisper.yml>
+<code>
+git clone https://github.com/fedirz/faster-whisper-server
+</code>
+<code - docker-faster-whisper-server.yml>
+name: faster-whisper-server
 services:
-  faster-whisper:
+  faster-whisper-server-cuda:
-    image: lscr.io/linuxserver/faster-whisper:gpu
+    image: fedirz/faster-whisper-server:latest-cuda
-    container_name: faster-whisper
+    build:
-    environment:
+      dockerfile: faster-whisper-server/Dockerfile.cuda
-      - PUID=1000
+      context: ./faster-whisper-server
-      - PGID=1000
+      platforms:
-      - TZ=Europe/London
+        - linux/amd64
-      - WHISPER_MODEL=tiny-int8
-      - WHISPER_BEAM=1 #optional
-      - WHISPER_LANG=en #optional
     volumes:
-      - /opt/whisper:/config
+      - /opt/faster-whisper-server/:/root/.cache/huggingface
-    ports:
-      - 10300:10300
     restart: unless-stopped
+    ports:
+      - 8010:8000
+    develop:
+      watch:
+        - path: faster_whisper_server
+          action: rebuild
     deploy:
       resources:
         reservations:
           devices:
-          - driver: nvidia
+            - capabilities: ["gpu"]
-            device_ids: ['0']
-            capabilities: [gpu]
 </code>
 go to settings -> audio, set
-  - OPENAI API host: http://host.docker.internal:10300/v1
+  - OPENAI API host: http://host.docker.internal:8010/v1
   - OPENAI API Key: sk-something
-  - model: hasspy:faster-whisper-tiny-int8
+  - model: whisper-1
+NOTE: speech to text requires https connection to open-webui as browsers do not have access to microphone on http connection!
+<code>
+mkdir /opt/docker-ssl-proxy/
+cd /opt/docker-ssl-proxy/
+openssl req -subj '/CN=hostname.example.com' -x509 -newkey rsa:4096 -nodes -keyout key.pem -out cert.pem -days 365
+</code>
+<code - /opt/docker-ssl-proxy/proxy_ssl.conf>
+server {
+  listen 80;
+  server_name _;
+  return 301 https://$host$request_uri;
+}
+server {
+  listen 443 ssl;
+  ssl_certificate /etc/nginx/conf.d/cert.pem;
+  ssl_certificate_key /etc/nginx/conf.d/key.pem;
+  location / {
+     proxy_pass http://host.docker.internal:3000;
+  }
+}
+</code>
+<code - docker-ssl-proxy.yml>
+name: nginx-proxy
+services:
+  nginx-proxy:
+    image: nginx
+    container_name: nginx-proxy
+    ports:
+      - 80:80
+      - 443:443
+    volumes:
+      - /opt/docker-ssl-proxy:/etc/nginx/conf.d
+    restart: unless-stopped
+    extra_hosts:
+      host.docker.internal: host-gateway
+    environment:
+      - TZ=Europe/London
+</code>
+To pull an ollama image, better to use ollama directly as the webinterface doesn't handle stalls well:
+<code>
+docker exec -ti ollama ollama pull imagename:tag
+</code>
+To update all previously pulled ollama models, use this bash script:
+<code bash update-ollama-models.sh>
+#!/bin/bash
+docker exec -ti ollama ollama list | tail -n +2 | awk '{print $1}' | while read -r model; do
+  echo "Updating model: $model..."
+  docker exec -t ollama ollama pull $model
+  echo "--"
+done
+echo "All models updated."
+</code>
-possible alternative: https://github.com/fedirz/faster-whisper-server
 AMD GPU on Windows:
@@ Line 188: / Line 266: @@
 </code>
-docker install - WSL2 backend
+Create the respective docker volumes folder:
-cmd line
+<code>
+# p/Docker_Volumes = P:\Docker_Volumes
+mkdir P:\Docker_Volumes
+</code>
+# docker install - choose the WSL2 backend
+# cmd line
 <code>
 docker compose -f docker-openwebui.yml up -d
@@ Line 195: / Line 280: @@
 </code>
-mkdir
-p/Docker_Volumes = P:\Docker_Volumes
+to update all ollama models on windows, use this powershell command - adjust for the hostname/ip ollama is running on:
+<code powershell>
+(Invoke-RestMethod http://localhost:11434/api/tags).Models.Name.ForEach{ ollama pull $_ }
+#or if in docker
+(Invoke-RestMethod http://localhost:11434/api/tags).Models.Name.ForEach{ docker exex -t ollama ollama pull $_ }
+</code>
+====== Curl OpenAI API test ======
+<code>
+curl http://localhost:11434/v1/chat/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+        "model": "llama3",
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant."
+            },
+            {
+                "role": "user",
+                "content": "Hello!"
+            }
+        ]
+    }'
+{"id":"chatcmpl-957","object":"chat.completion","created":1722601457,"model":"llama3","system_fingerprint":"fp_ollama","choices":[{"index":0,"message":{"role":"assistant","content":"Hi there! It's great to meet you! I'm here to help with any questions or tasks you might have. What brings you to this virtual space today? Are you looking for recommendations, seeking answers to a specific question, or maybe looking for some inspiration? Let me know, and I'll do my best to assist you."},"finish_reason":"stop"}],"usage":{"prompt_tokens":23,"completion_tokens":68,"total_tokens":91}}
+</code>