From b38908c9826a9a000365a1da315bc3a0e4bccf44 Mon Sep 17 00:00:00 2001
From: sean-bailey <seanbailey518@gmail.com>
Date: Tue, 5 Mar 2024 12:25:00 -0500
Subject: [PATCH 01/11] Get custom server changes up to source control

---
 llama_cpp/server/app.py | 56 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 51 insertions(+), 5 deletions(-)

diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
index ec9280986..591dd94ef 100644
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -2,6 +2,7 @@
 
 import os
 import json
+import boto3
 
 from threading import Lock
 from functools import partial
@@ -45,6 +46,49 @@
 from llama_cpp.server.errors import RouteErrorHandler
 
 
+title_message = os.getenv('TITLEMESSAGE', "🦙 llama.cpp Python API")
+apitable = os.getenv('APITABLE')
+
+def check_and_update_api_key(api_key, invocation_type, credit_cost=1):
+    # Initialize a boto3 DynamoDB resource
+    dynamodb = boto3.resource('dynamodb')
+    table = dynamodb.Table(apitable)  # Replace with your DynamoDB table name
+
+    # Try to get the item for the given API key
+    response = table.get_item(Key={'ApiKey': api_key})
+    item = response.get('Item')
+
+    if not item or not item.get('Authorized') or item.get('Credits', 0) < credit_cost:
+        # API key not found, not authorized, or not enough credits
+        return False
+
+    # Deduct credit_cost from the Credits and prepare TotalInvocations update
+    new_credits = item['Credits'] - credit_cost
+    invocations_update = {
+        ':cost': credit_cost,
+        ':newval': 1,
+        ':inv_type': {invocation_type: 0}
+    }
+
+    # Update the item in DynamoDB for the given API key
+    try:
+        table.update_item(
+            Key={'ApiKey': api_key},
+            UpdateExpression="SET Credits = Credits - :cost ADD TotalInvocations.#type :newval",
+            ExpressionAttributeNames={
+                '#type': invocation_type
+            },
+            ExpressionAttributeValues=invocations_update,
+            ConditionExpression="attribute_exists(ApiKey) AND Credits >= :cost",
+            ReturnValues="UPDATED_NEW"
+        )
+        return True
+    except Exception as e:
+        print(f"Error updating item: {e}")
+        return False
+
+
+
 router = APIRouter(route_class=RouteErrorHandler)
 
 _server_settings: Optional[ServerSettings] = None
@@ -117,7 +161,8 @@ def create_app(
     middleware = [Middleware(RawContextMiddleware, plugins=(RequestIdPlugin(),))]
     app = FastAPI(
         middleware=middleware,
-        title="🦙 llama.cpp Python API",
+        ###WORKHERE Make a modification so this reads in from OS on the specific endpoint for the end customer
+        title=title_message,
         version=llama_cpp.__version__,
     )
     app.add_middleware(
@@ -175,7 +220,7 @@ def _logit_bias_tokens_to_input_ids(
 # Setup Bearer authentication scheme
 bearer_scheme = HTTPBearer(auto_error=False)
 
-
+#so here is where I can put in my custom API authentication system. ###WORKHERE
 async def authenticate(
     settings: Settings = Depends(get_server_settings),
     authorization: Optional[str] = Depends(bearer_scheme),
@@ -185,14 +230,15 @@ async def authenticate(
         return True
 
     # check bearer credentials against the api_key
-    if authorization and authorization.credentials == settings.api_key:
+    if authorization: # and authorization.credentials == settings.api_key:
+        if check_and_update_api_key(api_key=authorization.credentials,invocation_type="text"):
         # api key is valid
-        return authorization.credentials
+            return authorization.credentials
 
     # raise http error 401
     raise HTTPException(
         status_code=status.HTTP_401_UNAUTHORIZED,
-        detail="Invalid API key",
+        detail="Invalid API key. Check API key and credits.",
     )
 
 

From 3604f900d3de798a4b5b94ae760a747876ace7be Mon Sep 17 00:00:00 2001
From: sean-bailey <seanbailey518@gmail.com>
Date: Tue, 5 Mar 2024 16:23:23 -0500
Subject: [PATCH 02/11] testing

---
 llama_cpp/server/app.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
index 591dd94ef..dbc0218bd 100644
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -53,7 +53,8 @@ def check_and_update_api_key(api_key, invocation_type, credit_cost=1):
     # Initialize a boto3 DynamoDB resource
     dynamodb = boto3.resource('dynamodb')
     table = dynamodb.Table(apitable)  # Replace with your DynamoDB table name
-
+    print("The api key coming in is ")
+    print(api_key)
     # Try to get the item for the given API key
     response = table.get_item(Key={'ApiKey': api_key})
     item = response.get('Item')

From 78f67143740cd13db029186c2313b0237530a979 Mon Sep 17 00:00:00 2001
From: sean-bailey <seanbailey518@gmail.com>
Date: Tue, 5 Mar 2024 17:02:10 -0500
Subject: [PATCH 03/11] more api key testing

---
 llama_cpp/server/app.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
index dbc0218bd..a33348cf0 100644
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -52,9 +52,10 @@
 def check_and_update_api_key(api_key, invocation_type, credit_cost=1):
     # Initialize a boto3 DynamoDB resource
     dynamodb = boto3.resource('dynamodb')
-    table = dynamodb.Table(apitable)  # Replace with your DynamoDB table name
-    print("The api key coming in is ")
-    print(api_key)
+    table = dynamodb.Table(apitable)  # Ensure 'apitable' is correctly defined earlier in your code
+
+    print("The api key coming in is ", api_key)
+    
     # Try to get the item for the given API key
     response = table.get_item(Key={'ApiKey': api_key})
     item = response.get('Item')
@@ -63,23 +64,23 @@ def check_and_update_api_key(api_key, invocation_type, credit_cost=1):
         # API key not found, not authorized, or not enough credits
         return False
 
-    # Deduct credit_cost from the Credits and prepare TotalInvocations update
-    new_credits = item['Credits'] - credit_cost
-    invocations_update = {
-        ':cost': credit_cost,
-        ':newval': 1,
-        ':inv_type': {invocation_type: 0}
-    }
+    # Prepare the update expression
+    update_expression = "SET Credits = Credits - :cost"
+    expression_attribute_values = {':cost': credit_cost, ':newval': 1}
+    expression_attribute_names = {'#type': invocation_type}
+
+    # The UpdateExpression to handle both new and existing invocation types
+    update_expression += ", TotalInvocations.#type = if_not_exists(TotalInvocations.#type, :startval) + :newval"
+
+    expression_attribute_values[':startval'] = 0
 
     # Update the item in DynamoDB for the given API key
     try:
         table.update_item(
             Key={'ApiKey': api_key},
-            UpdateExpression="SET Credits = Credits - :cost ADD TotalInvocations.#type :newval",
-            ExpressionAttributeNames={
-                '#type': invocation_type
-            },
-            ExpressionAttributeValues=invocations_update,
+            UpdateExpression=update_expression,
+            ExpressionAttributeNames=expression_attribute_names,
+            ExpressionAttributeValues=expression_attribute_values,
             ConditionExpression="attribute_exists(ApiKey) AND Credits >= :cost",
             ReturnValues="UPDATED_NEW"
         )

From 7fdca31df351eaa81ddab74e07ae27d0d907c25e Mon Sep 17 00:00:00 2001
From: sean-bailey <seanbailey518@gmail.com>
Date: Tue, 5 Mar 2024 18:27:27 -0500
Subject: [PATCH 04/11] added error handling logic

---
 llama_cpp/server/app.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
index a33348cf0..3751932db 100644
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -60,9 +60,12 @@ def check_and_update_api_key(api_key, invocation_type, credit_cost=1):
     response = table.get_item(Key={'ApiKey': api_key})
     item = response.get('Item')
 
-    if not item or not item.get('Authorized') or item.get('Credits', 0) < credit_cost:
+    if not item or not item.get('Authorized'):
         # API key not found, not authorized, or not enough credits
-        return False
+        return False,"API key not authorized. "
+    creditval = item.get('Credits', 0)
+    if creditval < credit_cost:
+        return False,"API Key does not have enough credits, have "+str(creditval)+", need "+str(credit_cost)
 
     # Prepare the update expression
     update_expression = "SET Credits = Credits - :cost"
@@ -84,10 +87,10 @@ def check_and_update_api_key(api_key, invocation_type, credit_cost=1):
             ConditionExpression="attribute_exists(ApiKey) AND Credits >= :cost",
             ReturnValues="UPDATED_NEW"
         )
-        return True
+        return True,""
     except Exception as e:
         print(f"Error updating item: {e}")
-        return False
+        return False, "There was an error with that API key. Please check and try again, otherwise contact support."
 
 
 
@@ -233,9 +236,15 @@ async def authenticate(
 
     # check bearer credentials against the api_key
     if authorization: # and authorization.credentials == settings.api_key:
-        if check_and_update_api_key(api_key=authorization.credentials,invocation_type="text"):
+        goodkey,message=check_and_update_api_key(api_key=authorization.credentials,invocation_type="text")
+        if goodkey:
         # api key is valid
             return authorization.credentials
+        else:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail=message,
+            )
 
     # raise http error 401
     raise HTTPException(

From 93aa71ba7b07648241fd219a813157750a0fe5f0 Mon Sep 17 00:00:00 2001
From: sean-bailey <seanbailey518@gmail.com>
Date: Sun, 10 Mar 2024 11:06:36 -0400
Subject: [PATCH 05/11] testing gradio integration

---
 llama_cpp/server/app.py | 5 +++++
 pyproject.toml          | 1 +
 2 files changed, 6 insertions(+)

diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
index 3751932db..df970c197 100644
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -3,6 +3,7 @@
 import os
 import json
 import boto3
+import gradio as gr
 
 from threading import Lock
 from functools import partial
@@ -181,7 +182,11 @@ def create_app(
 
     assert model_settings is not None
     set_llama_proxy(model_settings=model_settings)
+    #We're going to see if we can get the gradio url settings working
+    CUSTOM_PATH = "/gradio"
 
+    io = gr.Interface(lambda x: "Hello, " + x + "!", "textbox", "textbox")
+    app = gr.mount_gradio_app(app, io, path=CUSTOM_PATH,share=True,debug=True)
     return app
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 2f3d3ced0..4cabb841c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,7 @@ server = [
     "pydantic-settings>=2.0.1",
     "sse-starlette>=1.6.1",
     "starlette-context>=0.3.6,<0.4",
+    "gradio",
 ]
 test = [
     "pytest>=7.4.0",

From cb31dfbde6e166485ab43f01e56a2e2284651945 Mon Sep 17 00:00:00 2001
From: sean-bailey <seanbailey518@gmail.com>
Date: Sun, 10 Mar 2024 11:12:33 -0400
Subject: [PATCH 06/11] another gradio test

---
 llama_cpp/server/app.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
index df970c197..0ef2f45c2 100644
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -185,8 +185,8 @@ def create_app(
     #We're going to see if we can get the gradio url settings working
     CUSTOM_PATH = "/gradio"
 
-    io = gr.Interface(lambda x: "Hello, " + x + "!", "textbox", "textbox")
-    app = gr.mount_gradio_app(app, io, path=CUSTOM_PATH,share=True,debug=True)
+    io = gr.Interface(lambda x: "Hello, " + x + "!", "textbox", "textbox",share=True,debug=True)
+    app = gr.mount_gradio_app(app, io, path=CUSTOM_PATH)
     return app
 
 

From b11af25924aef51bb79a8f1fe789014b44e2836b Mon Sep 17 00:00:00 2001
From: sean-bailey <seanbailey518@gmail.com>
Date: Sun, 10 Mar 2024 12:28:51 -0400
Subject: [PATCH 07/11] those changes did not work

---
 llama_cpp/server/app.py | 8 ++++----
 pyproject.toml          | 1 -
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
index 0ef2f45c2..9b81fe169 100644
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -3,7 +3,7 @@
 import os
 import json
 import boto3
-import gradio as gr
+#import gradio as gr
 
 from threading import Lock
 from functools import partial
@@ -183,10 +183,10 @@ def create_app(
     assert model_settings is not None
     set_llama_proxy(model_settings=model_settings)
     #We're going to see if we can get the gradio url settings working
-    CUSTOM_PATH = "/gradio"
+    #CUSTOM_PATH = "/gradio"
 
-    io = gr.Interface(lambda x: "Hello, " + x + "!", "textbox", "textbox",share=True,debug=True)
-    app = gr.mount_gradio_app(app, io, path=CUSTOM_PATH)
+    #io = gr.Interface(lambda x: "Hello, " + x + "!", "textbox", "textbox",share=True,debug=True)
+    #app = gr.mount_gradio_app(app, io, path=CUSTOM_PATH)
     return app
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 4cabb841c..2f3d3ced0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,7 +35,6 @@ server = [
     "pydantic-settings>=2.0.1",
     "sse-starlette>=1.6.1",
     "starlette-context>=0.3.6,<0.4",
-    "gradio",
 ]
 test = [
     "pytest>=7.4.0",

From 9270a99708e590a88b5d3f9b88fde1c9ffdcf758 Mon Sep 17 00:00:00 2001
From: sean-bailey <seanbailey518@gmail.com>
Date: Sun, 10 Mar 2024 14:59:37 -0400
Subject: [PATCH 08/11] add in capabilities to set full power for config

---
 llama_cpp/server/settings.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/llama_cpp/server/settings.py b/llama_cpp/server/settings.py
index daa913fac..254464fe5 100644
--- a/llama_cpp/server/settings.py
+++ b/llama_cpp/server/settings.py
@@ -67,12 +67,12 @@ class ModelSettings(BaseSettings):
     n_threads: int = Field(
         default=max(multiprocessing.cpu_count() // 2, 1),
         ge=1,
-        description="The number of threads to use.",
+        description="The number of threads to use. Use -1 for max cpu threads",
     )
     n_threads_batch: int = Field(
         default=max(multiprocessing.cpu_count() // 2, 1),
         ge=0,
-        description="The number of threads to use when batch processing.",
+        description="The number of threads to use when batch processing. Use -1 for max cpu threads",
     )
     rope_scaling_type: int = Field(
         default=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED
@@ -163,6 +163,15 @@ class ModelSettings(BaseSettings):
     verbose: bool = Field(
         default=True, description="Whether to print debug information."
     )
+    @root_validator(pre=True)  # pre=True to ensure this runs before any other validation
+    def set_dynamic_defaults(cls, values):
+        # If n_threads or n_threads_batch is -1, set it to multiprocessing.cpu_count()
+        cpu_count = multiprocessing.cpu_count()
+        if values.get('n_threads', 0) == -1:
+            values['n_threads'] = cpu_count
+        if values.get('n_threads_batch', 0) == -1:
+            values['n_threads_batch'] = cpu_count
+        return values
 
 
 class ServerSettings(BaseSettings):

From 416c43ae8693f0ca393a0b1a5fea711e63af1640 Mon Sep 17 00:00:00 2001
From: sean-bailey <seanbailey518@gmail.com>
Date: Sun, 10 Mar 2024 15:00:41 -0400
Subject: [PATCH 09/11] reverting back to fixed api style

---
 llama_cpp/server/app.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
index 9b81fe169..da2f8c00c 100644
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -240,16 +240,17 @@ async def authenticate(
         return True
 
     # check bearer credentials against the api_key
-    if authorization: # and authorization.credentials == settings.api_key:
-        goodkey,message=check_and_update_api_key(api_key=authorization.credentials,invocation_type="text")
-        if goodkey:
+    if authorization and authorization.credentials == settings.api_key:
+        #goodkey,message=check_and_update_api_key(api_key=authorization.credentials,invocation_type="text")
+        #if goodkey:
         # api key is valid
-            return authorization.credentials
-        else:
-            raise HTTPException(
-                status_code=status.HTTP_401_UNAUTHORIZED,
-                detail=message,
-            )
+        #    return authorization.credentials
+        #else:
+        #    raise HTTPException(
+        #        status_code=status.HTTP_401_UNAUTHORIZED,
+        #        detail=message,
+        #    )
+        return authorization.credentials
 
     # raise http error 401
     raise HTTPException(

From 25bce0e9074814649ce0c241be362e7856a4285e Mon Sep 17 00:00:00 2001
From: sean-bailey <seanbailey518@gmail.com>
Date: Sun, 10 Mar 2024 15:04:36 -0400
Subject: [PATCH 10/11] bugfix

---
 llama_cpp/server/settings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama_cpp/server/settings.py b/llama_cpp/server/settings.py
index 254464fe5..39bb9348b 100644
--- a/llama_cpp/server/settings.py
+++ b/llama_cpp/server/settings.py
@@ -3,7 +3,7 @@
 import multiprocessing
 
 from typing import Optional, List, Literal, Union
-from pydantic import Field
+from pydantic import Field, BaseSettings, root_validator
 from pydantic_settings import BaseSettings
 
 import llama_cpp

From 9bbe3933a16fc26deb02e739b697d4231b5a13c2 Mon Sep 17 00:00:00 2001
From: sean-bailey <seanbailey518@gmail.com>
Date: Sun, 10 Mar 2024 15:09:08 -0400
Subject: [PATCH 11/11] bugfix

---
 llama_cpp/server/settings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama_cpp/server/settings.py b/llama_cpp/server/settings.py
index 39bb9348b..f905c5717 100644
--- a/llama_cpp/server/settings.py
+++ b/llama_cpp/server/settings.py
@@ -3,7 +3,7 @@
 import multiprocessing
 
 from typing import Optional, List, Literal, Union
-from pydantic import Field, BaseSettings, root_validator
+from pydantic import Field, root_validator
 from pydantic_settings import BaseSettings
 
 import llama_cpp