From b38908c9826a9a000365a1da315bc3a0e4bccf44 Mon Sep 17 00:00:00 2001 From: sean-bailey Date: Tue, 5 Mar 2024 12:25:00 -0500 Subject: [PATCH 01/11] Get custom server changes up to source control --- llama_cpp/server/app.py | 56 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index ec9280986..591dd94ef 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -2,6 +2,7 @@ import os import json +import boto3 from threading import Lock from functools import partial @@ -45,6 +46,49 @@ from llama_cpp.server.errors import RouteErrorHandler +title_message = os.getenv('TITLEMESSAGE', "🦙 llama.cpp Python API") +apitable = os.getenv('APITABLE') + +def check_and_update_api_key(api_key, invocation_type, credit_cost=1): + # Initialize a boto3 DynamoDB resource + dynamodb = boto3.resource('dynamodb') + table = dynamodb.Table(apitable) # Replace with your DynamoDB table name + + # Try to get the item for the given API key + response = table.get_item(Key={'ApiKey': api_key}) + item = response.get('Item') + + if not item or not item.get('Authorized') or item.get('Credits', 0) < credit_cost: + # API key not found, not authorized, or not enough credits + return False + + # Deduct credit_cost from the Credits and prepare TotalInvocations update + new_credits = item['Credits'] - credit_cost + invocations_update = { + ':cost': credit_cost, + ':newval': 1, + ':inv_type': {invocation_type: 0} + } + + # Update the item in DynamoDB for the given API key + try: + table.update_item( + Key={'ApiKey': api_key}, + UpdateExpression="SET Credits = Credits - :cost ADD TotalInvocations.#type :newval", + ExpressionAttributeNames={ + '#type': invocation_type + }, + ExpressionAttributeValues=invocations_update, + ConditionExpression="attribute_exists(ApiKey) AND Credits >= :cost", + ReturnValues="UPDATED_NEW" + ) + return True + except Exception as e: + print(f"Error updating item: {e}") + return False + + + router = APIRouter(route_class=RouteErrorHandler) _server_settings: Optional[ServerSettings] = None @@ -117,7 +161,8 @@ def create_app( middleware = [Middleware(RawContextMiddleware, plugins=(RequestIdPlugin(),))] app = FastAPI( middleware=middleware, - title="🦙 llama.cpp Python API", + ###WORKHERE Make a modification so this reads in from OS on the specific endpoint for the end customer + title=title_message, version=llama_cpp.__version__, ) app.add_middleware( @@ -175,7 +220,7 @@ def _logit_bias_tokens_to_input_ids( # Setup Bearer authentication scheme bearer_scheme = HTTPBearer(auto_error=False) - +#so here is where I can put in my custom API authentication system. ###WORKHERE async def authenticate( settings: Settings = Depends(get_server_settings), authorization: Optional[str] = Depends(bearer_scheme), @@ -185,14 +230,15 @@ async def authenticate( return True # check bearer credentials against the api_key - if authorization and authorization.credentials == settings.api_key: + if authorization: # and authorization.credentials == settings.api_key: + if check_and_update_api_key(api_key=authorization.credentials,invocation_type="text"): # api key is valid - return authorization.credentials + return authorization.credentials # raise http error 401 raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail="Invalid API key", + detail="Invalid API key. Check API key and credits.", ) From 3604f900d3de798a4b5b94ae760a747876ace7be Mon Sep 17 00:00:00 2001 From: sean-bailey Date: Tue, 5 Mar 2024 16:23:23 -0500 Subject: [PATCH 02/11] testing --- llama_cpp/server/app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index 591dd94ef..dbc0218bd 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -53,7 +53,8 @@ def check_and_update_api_key(api_key, invocation_type, credit_cost=1): # Initialize a boto3 DynamoDB resource dynamodb = boto3.resource('dynamodb') table = dynamodb.Table(apitable) # Replace with your DynamoDB table name - + print("The api key coming in is ") + print(api_key) # Try to get the item for the given API key response = table.get_item(Key={'ApiKey': api_key}) item = response.get('Item') From 78f67143740cd13db029186c2313b0237530a979 Mon Sep 17 00:00:00 2001 From: sean-bailey Date: Tue, 5 Mar 2024 17:02:10 -0500 Subject: [PATCH 03/11] more api key testing --- llama_cpp/server/app.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index dbc0218bd..a33348cf0 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -52,9 +52,10 @@ def check_and_update_api_key(api_key, invocation_type, credit_cost=1): # Initialize a boto3 DynamoDB resource dynamodb = boto3.resource('dynamodb') - table = dynamodb.Table(apitable) # Replace with your DynamoDB table name - print("The api key coming in is ") - print(api_key) + table = dynamodb.Table(apitable) # Ensure 'apitable' is correctly defined earlier in your code + + print("The api key coming in is ", api_key) + # Try to get the item for the given API key response = table.get_item(Key={'ApiKey': api_key}) item = response.get('Item') @@ -63,23 +64,23 @@ def check_and_update_api_key(api_key, invocation_type, credit_cost=1): # API key not found, not authorized, or not enough credits return False - # Deduct credit_cost from the Credits and prepare TotalInvocations update - new_credits = item['Credits'] - credit_cost - invocations_update = { - ':cost': credit_cost, - ':newval': 1, - ':inv_type': {invocation_type: 0} - } + # Prepare the update expression + update_expression = "SET Credits = Credits - :cost" + expression_attribute_values = {':cost': credit_cost, ':newval': 1} + expression_attribute_names = {'#type': invocation_type} + + # The UpdateExpression to handle both new and existing invocation types + update_expression += ", TotalInvocations.#type = if_not_exists(TotalInvocations.#type, :startval) + :newval" + + expression_attribute_values[':startval'] = 0 # Update the item in DynamoDB for the given API key try: table.update_item( Key={'ApiKey': api_key}, - UpdateExpression="SET Credits = Credits - :cost ADD TotalInvocations.#type :newval", - ExpressionAttributeNames={ - '#type': invocation_type - }, - ExpressionAttributeValues=invocations_update, + UpdateExpression=update_expression, + ExpressionAttributeNames=expression_attribute_names, + ExpressionAttributeValues=expression_attribute_values, ConditionExpression="attribute_exists(ApiKey) AND Credits >= :cost", ReturnValues="UPDATED_NEW" ) From 7fdca31df351eaa81ddab74e07ae27d0d907c25e Mon Sep 17 00:00:00 2001 From: sean-bailey Date: Tue, 5 Mar 2024 18:27:27 -0500 Subject: [PATCH 04/11] added error handling logic --- llama_cpp/server/app.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index a33348cf0..3751932db 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -60,9 +60,12 @@ def check_and_update_api_key(api_key, invocation_type, credit_cost=1): response = table.get_item(Key={'ApiKey': api_key}) item = response.get('Item') - if not item or not item.get('Authorized') or item.get('Credits', 0) < credit_cost: + if not item or not item.get('Authorized'): # API key not found, not authorized, or not enough credits - return False + return False,"API key not authorized. " + creditval = item.get('Credits', 0) + if creditval < credit_cost: + return False,"API Key does not have enough credits, have "+str(creditval)+", need "+str(credit_cost) # Prepare the update expression update_expression = "SET Credits = Credits - :cost" @@ -84,10 +87,10 @@ def check_and_update_api_key(api_key, invocation_type, credit_cost=1): ConditionExpression="attribute_exists(ApiKey) AND Credits >= :cost", ReturnValues="UPDATED_NEW" ) - return True + return True,"" except Exception as e: print(f"Error updating item: {e}") - return False + return False, "There was an error with that API key. Please check and try again, otherwise contact support." @@ -233,9 +236,15 @@ async def authenticate( # check bearer credentials against the api_key if authorization: # and authorization.credentials == settings.api_key: - if check_and_update_api_key(api_key=authorization.credentials,invocation_type="text"): + goodkey,message=check_and_update_api_key(api_key=authorization.credentials,invocation_type="text") + if goodkey: # api key is valid return authorization.credentials + else: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=message, + ) # raise http error 401 raise HTTPException( From 93aa71ba7b07648241fd219a813157750a0fe5f0 Mon Sep 17 00:00:00 2001 From: sean-bailey Date: Sun, 10 Mar 2024 11:06:36 -0400 Subject: [PATCH 05/11] testing gradio integration --- llama_cpp/server/app.py | 5 +++++ pyproject.toml | 1 + 2 files changed, 6 insertions(+) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index 3751932db..df970c197 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -3,6 +3,7 @@ import os import json import boto3 +import gradio as gr from threading import Lock from functools import partial @@ -181,7 +182,11 @@ def create_app( assert model_settings is not None set_llama_proxy(model_settings=model_settings) + #We're going to see if we can get the gradio url settings working + CUSTOM_PATH = "/gradio" + io = gr.Interface(lambda x: "Hello, " + x + "!", "textbox", "textbox") + app = gr.mount_gradio_app(app, io, path=CUSTOM_PATH,share=True,debug=True) return app diff --git a/pyproject.toml b/pyproject.toml index 2f3d3ced0..4cabb841c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ server = [ "pydantic-settings>=2.0.1", "sse-starlette>=1.6.1", "starlette-context>=0.3.6,<0.4", + "gradio", ] test = [ "pytest>=7.4.0", From cb31dfbde6e166485ab43f01e56a2e2284651945 Mon Sep 17 00:00:00 2001 From: sean-bailey Date: Sun, 10 Mar 2024 11:12:33 -0400 Subject: [PATCH 06/11] another gradio test --- llama_cpp/server/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index df970c197..0ef2f45c2 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -185,8 +185,8 @@ def create_app( #We're going to see if we can get the gradio url settings working CUSTOM_PATH = "/gradio" - io = gr.Interface(lambda x: "Hello, " + x + "!", "textbox", "textbox") - app = gr.mount_gradio_app(app, io, path=CUSTOM_PATH,share=True,debug=True) + io = gr.Interface(lambda x: "Hello, " + x + "!", "textbox", "textbox",share=True,debug=True) + app = gr.mount_gradio_app(app, io, path=CUSTOM_PATH) return app From b11af25924aef51bb79a8f1fe789014b44e2836b Mon Sep 17 00:00:00 2001 From: sean-bailey Date: Sun, 10 Mar 2024 12:28:51 -0400 Subject: [PATCH 07/11] those changes did not work --- llama_cpp/server/app.py | 8 ++++---- pyproject.toml | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index 0ef2f45c2..9b81fe169 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -3,7 +3,7 @@ import os import json import boto3 -import gradio as gr +#import gradio as gr from threading import Lock from functools import partial @@ -183,10 +183,10 @@ def create_app( assert model_settings is not None set_llama_proxy(model_settings=model_settings) #We're going to see if we can get the gradio url settings working - CUSTOM_PATH = "/gradio" + #CUSTOM_PATH = "/gradio" - io = gr.Interface(lambda x: "Hello, " + x + "!", "textbox", "textbox",share=True,debug=True) - app = gr.mount_gradio_app(app, io, path=CUSTOM_PATH) + #io = gr.Interface(lambda x: "Hello, " + x + "!", "textbox", "textbox",share=True,debug=True) + #app = gr.mount_gradio_app(app, io, path=CUSTOM_PATH) return app diff --git a/pyproject.toml b/pyproject.toml index 4cabb841c..2f3d3ced0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,6 @@ server = [ "pydantic-settings>=2.0.1", "sse-starlette>=1.6.1", "starlette-context>=0.3.6,<0.4", - "gradio", ] test = [ "pytest>=7.4.0", From 9270a99708e590a88b5d3f9b88fde1c9ffdcf758 Mon Sep 17 00:00:00 2001 From: sean-bailey Date: Sun, 10 Mar 2024 14:59:37 -0400 Subject: [PATCH 08/11] add in capabilities to set full power for config --- llama_cpp/server/settings.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/llama_cpp/server/settings.py b/llama_cpp/server/settings.py index daa913fac..254464fe5 100644 --- a/llama_cpp/server/settings.py +++ b/llama_cpp/server/settings.py @@ -67,12 +67,12 @@ class ModelSettings(BaseSettings): n_threads: int = Field( default=max(multiprocessing.cpu_count() // 2, 1), ge=1, - description="The number of threads to use.", + description="The number of threads to use. Use -1 for max cpu threads", ) n_threads_batch: int = Field( default=max(multiprocessing.cpu_count() // 2, 1), ge=0, - description="The number of threads to use when batch processing.", + description="The number of threads to use when batch processing. Use -1 for max cpu threads", ) rope_scaling_type: int = Field( default=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED @@ -163,6 +163,15 @@ class ModelSettings(BaseSettings): verbose: bool = Field( default=True, description="Whether to print debug information." ) + @root_validator(pre=True) # pre=True to ensure this runs before any other validation + def set_dynamic_defaults(cls, values): + # If n_threads or n_threads_batch is -1, set it to multiprocessing.cpu_count() + cpu_count = multiprocessing.cpu_count() + if values.get('n_threads', 0) == -1: + values['n_threads'] = cpu_count + if values.get('n_threads_batch', 0) == -1: + values['n_threads_batch'] = cpu_count + return values class ServerSettings(BaseSettings): From 416c43ae8693f0ca393a0b1a5fea711e63af1640 Mon Sep 17 00:00:00 2001 From: sean-bailey Date: Sun, 10 Mar 2024 15:00:41 -0400 Subject: [PATCH 09/11] reverting back to fixed api style --- llama_cpp/server/app.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index 9b81fe169..da2f8c00c 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -240,16 +240,17 @@ async def authenticate( return True # check bearer credentials against the api_key - if authorization: # and authorization.credentials == settings.api_key: - goodkey,message=check_and_update_api_key(api_key=authorization.credentials,invocation_type="text") - if goodkey: + if authorization and authorization.credentials == settings.api_key: + #goodkey,message=check_and_update_api_key(api_key=authorization.credentials,invocation_type="text") + #if goodkey: # api key is valid - return authorization.credentials - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=message, - ) + # return authorization.credentials + #else: + # raise HTTPException( + # status_code=status.HTTP_401_UNAUTHORIZED, + # detail=message, + # ) + return authorization.credentials # raise http error 401 raise HTTPException( From 25bce0e9074814649ce0c241be362e7856a4285e Mon Sep 17 00:00:00 2001 From: sean-bailey Date: Sun, 10 Mar 2024 15:04:36 -0400 Subject: [PATCH 10/11] bugfix --- llama_cpp/server/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_cpp/server/settings.py b/llama_cpp/server/settings.py index 254464fe5..39bb9348b 100644 --- a/llama_cpp/server/settings.py +++ b/llama_cpp/server/settings.py @@ -3,7 +3,7 @@ import multiprocessing from typing import Optional, List, Literal, Union -from pydantic import Field +from pydantic import Field, BaseSettings, root_validator from pydantic_settings import BaseSettings import llama_cpp From 9bbe3933a16fc26deb02e739b697d4231b5a13c2 Mon Sep 17 00:00:00 2001 From: sean-bailey Date: Sun, 10 Mar 2024 15:09:08 -0400 Subject: [PATCH 11/11] bugfix --- llama_cpp/server/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_cpp/server/settings.py b/llama_cpp/server/settings.py index 39bb9348b..f905c5717 100644 --- a/llama_cpp/server/settings.py +++ b/llama_cpp/server/settings.py @@ -3,7 +3,7 @@ import multiprocessing from typing import Optional, List, Literal, Union -from pydantic import Field, BaseSettings, root_validator +from pydantic import Field, root_validator from pydantic_settings import BaseSettings import llama_cpp