Differences

This shows you the differences between two versions of the page.

--- ai:private-gpt [2024/04/23 00:34] – created Wulf Rajek
+++ ai:private-gpt [2024/05/01 17:33] (current) – Wulf Rajek
@@ Line 128: / Line 128: @@
 ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
-USER worker
+#USER worker
 #ENTRYPOINT ["/entrypoint.sh", "python", "-m", "private_gpt"]
 ENTRYPOINT /entrypoint.sh python -m private_gpt
 EOD
@@ Line 144: / Line 143: @@
 ## Execute the main container command
 exec "$@"
 EOD
@@ Line 206: / Line 204: @@
       PGPT_EMBEDDING_MODE: huggingface
+      #Microsoft Phi-3 Mini 4k
+      PGPT_HF_REPO_ID: "microsoft/Phi-3-mini-4k-instruct-gguf"
+      #PGPT_HF_MODEL_FILE: "Phi-3-mini-4k-instruct-fp16.gguf"
+      PGPT_HF_MODEL_FILE: "Phi-3-mini-4k-instruct-q4.gguf"
+      PGPT_PROMPT_STYLE: "chatml"
       #Meta Llama 3
       #PGPT_HF_REPO_ID: "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
       #PGPT_HF_MODEL_FILE: "Meta-Llama-3-8B-Instruct.Q5_K_M.gguf"
-      #PGPT_TOKENIZER: "meta-llama/Meta-Llama-3-8B-Instruct"
+      #PGPT_PROMPT_STYLE: "llama3"
-      #OpenOrca Mistral
+      #OpenOrca Mistral 7B
       #PGPT_HF_REPO_ID: "TheBloke/Mistral-7B-OpenOrca-GGUF"
       #PGPT_HF_MODEL_FILE: "mistral-7b-openorca.Q5_K_M.gguf"
+      #PGPT_PROMPT_STYLE: "mistral"
       PGPT_EMBEDDING_HF_MODEL_NAME: "BAAI/bge-small-en-v1.5"
       #PGPT_EMBEDDING_HF_MODEL_NAME: "BAAI/bge-large-en-v1.5"
       TOKENIZERS_PARALLELISM: True
+      #PGPT_NGL: 20
+      PGPT_MAX_NEW_TOKENS: 512
+      PGPT_CONTEXT_WINDOW: 3900
+      PGPT_TEMPERATURE: 0.1
-      PGPT_PROMPT_STYLE: "mistral"
       EMBEDDING_INGEST_MODE: "simple"
       EMBEDDING_COUNT_WORKERS: "2"
@@ Line 241: / Line 250: @@
 </code>
-===== NPL settings patch =====
+===== NGL settings patch =====
 To add the amount of layers loaded in the GPU for llamacpp, apply this NGL option patch, then add       "PGPT_NGL: 20" to the docker compose environment section with 20 being the amount of layers or -1 for all.
@@ Line 293: / Line 302: @@
 </code>
+===== Max New Tokens / Context Size / Temperature settings patch =====
+To be able to set Max New Tokens, Context Size and Temperature in the docker compose file as variables, the settings.yaml file needs to be adjusted.
+docker compose file additions:
+<code>
+    environment:
+      PGPT_MAX_NEW_TOKENS: 512
+      PGPT_CONTEXT_WINDOW: 3900
+      PGPT_TEMPERATURE: 0.1
+</code>
+<code bash>
+cat << EOD >> token-ctx-temp-settings-option.patch
+diff --git a/settings.yaml b/settings.yaml
+index e881a55..8666b86 100644
+--- a/settings.yaml
++++ b/settings.yaml
+@@ -37,10 +37,10 @@ ui:
+ llm:
+   mode: llamacpp
+   # Should be matching the selected model
+-  max_new_tokens: 512
+-  context_window: 3900
++  max_new_tokens: ${PGPT_MAX_NEW_TOKENS:512}
++  context_window: ${PGPT_CONTEXT_WINDOW:3900}
+   tokenizer: mistralai/Mistral-7B-Instruct-v0.2
+-  temperature: 0.1      # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
++  temperature: ${PGPT_TEMPERATURE:0.1}      # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
+ rag:
+   similarity_top_k: 2
+EOD
+git apply token-ctx-temp-settings-option.patch
+</code>
+===== CSS Customisation =====
+To adjust the main input box and fix mobile/low height browser window issue of the input box wrapping to the right, some css trickery is required. The last three css lines are added to privategpt/ui/ui.py for this:
+<code python privategpt/ui/ui.py>
+def _build_ui_blocks(self) -> gr.Blocks:
+        logger.debug("Creating the UI blocks")
+        with gr.Blocks(
+            title=UI_TAB_TITLE,
+            theme=gr.themes.Soft(primary_hue=slate),
+            css=".logo { "
+            "display:flex;"
+            "background-color: #000;"
+            "height: 90px;"
+            "border-radius: 8px;"
+            "align-content: center;"
+            "justify-content: center;"
+            "align-items: center;"
+            "font-size: xxx-large;"
+            "color: #fff;"
+            "font-weight: bold;"
+            "}"
+            ".logo img { height: 100% }"
+            ".contain { display: flex !important; flex-direction: column !important; }"
+            "#component-0, #component-3, #component-10, #component-8  { height: 100% !important; }"
+            "#chatbot { flex-grow: 1 !important; overflow: auto !important;}"
+            "#col { height: calc(100vh - 112px - 16px) !important; }"
+            "#component-24 > label:nth-child(1) > textarea:nth-child(2) { min-height: 100px !important; }"
+            "#component-24 { min-width: min(260px, 100%) !important;}"
+            "#col { min-height:750px !important; }",
+        ) as blocks:
+            with gr.Row():
+</code>