Get Started • Documentation • References • Contact
LLM4Time is a Python library for time series forecasting using Large Language Models (LLMs). It provides a modular architecture that includes:
- Data preprocessing and handling
- Prompt generation
- Forecasting with LLMs
- Metric evaluation
- Interactive visualization
pip install llm4timeIn addition, we provide a Streamlit-based interface, offering a more intuitive and practical way to interact with the library.
Follow the steps below to clone the repository, set up the environment, and run the application.
git clone https://github.com/zairobastos/LLM4Time.git
cd LLM4Timepython -m venv .venv
source .venv/bin/activate # Bash/Zsh
source .venv/bin/activate.fish # Fish Shellpip install -e .
pip install -r requirements.txt -r requirements-streamlit.txtUsing python 🐍
python app/main.pyAccess the application at
http://localhost:8501
Or using docker 🐋
docker compose upfrom llm4time.core.data import loader
from llm4time.core.evaluate import Statistics
# Data loading using CSV, XLSX, JSON or Parquet
df = loader.load_data("etth2.csv")
# Descriptive statistics
stats = Statistics(df['OT'])
print(f"Mean: {stats.mean}")
print(f"Median: {stats.median}")
print(f"1° Quartile: {stats.first_quartile}")
print(f"3° Quartile: {stats.third_quartile}")
print(f"Standard Deviation: {stats.std}")
print(f"Minimum: {stats.min}")
print(f"Maximum: {stats.max}")
print(f"Number of missing values: {stats.missing_count}")
print(f"Percentage of missing values: {stats.missing_percentage}")from llm4time.core.data import preprocessor
# Standardize into time series format
df = preprocessor.standardize(
df,
date_col='date', # Column containing dates/timestamps
value_col='OT', # Column containing time series values
duplicates='first' # How to handle duplicate rows: 'first' keeps the first occurrence
)
# Ensure all timestamps are present
df = preprocessor.normalize(df, freq='h')from llm4time.core.data import imputation
# Replace missing values with the column mean
df = imputation.mean(df)from llm4time.core.data import preprocessor
# Split the dataset into training and validation sets
train, y_val = preprocessor.split(
df,
start_date='2016-06-01 00:00:00', # Start of the training set
end_date='2016-12-01 00:00:00', # End of the training set
periods=24 # Number of periods to forecast
)from llm4time.core import prompt
from llm4time.core import PromptType, TSFormat, TSType
content = prompt.generate(
train, # Training set [(date, value), ...]
periods=24, # Number of periods to forecast
prompt_type=PromptType.ZERO_SHOT, # prompt type: ZERO_SHOT (no examples)
ts_format=TSFormat.ARRAY, # time series format
ts_type=TSType.NUMERIC # Type of encoding for series values
)from llm4time.core.models import OpenAI
model = OpenAI(
model='gpt-4o', # OpenAI model to be used.
api_key='...', # API key for authentication with the OpenAI service.
base_url='..' # Base URL of the OpenAI endpoint.
)# Forecasting
response, prompt_tokens, response_tokens, time_sec = model.predict(
content, # Previously generated prompt
temperature=0.7, # Level of randomness in the response
max_tokens=1000 # Maximum number of tokens in the response
)
print("Model response:", response)
print("Prompt tokens:", prompt_tokens)
print("Response tokens:", response_tokens)
print("Execution time (s):", time_sec)from llm4time.core import formatter
from llm4time.core.evaluate.metrics import Metrics
# Converts the response string into a numerical list
y_pred = formatter.parse(
response,
ts_format=TSFormat.ARRAY,
ts_type=TSType.NUMERIC
)
metrics = Metrics(y_val, y_pred)
# Error metrics
print(f"sMAPE: {metrics.smape}") # Symmetric Mean Absolute Percentage Error
print(f"MAE: {metrics.mae}") # Mean Absolute Error
print(f"RMSE: {metrics.rmse}") # Root Mean Squared Errorfrom llm4time.visualization import plots
# Generate a comparison plot between actual and predicted values
plots.plot_forecast("Comparison between actual and predicted values", y_val, y_pred)
# Generate a bar chart comparing descriptive statistics
plots.plot_forecast_statistics("Statistical comparison", y_val, y_pred)@article{zairo2025prompt,
title={Prompt-Driven Time Series Forecasting with Large Language Models},
author={Zairo Bastos and João David Freitas and José Wellington Franco and Carlos Caminha},
journal={Proceedings of the 27th International Conference on Enterprise Information Systems - Volume 1: ICEIS},
year={2025}
}|
Zairo Bastos Master’s student - UFC 📧 🔗 |
Wesley Barbosa Undergraduate student - UFC 📧 🔗 |
Fernanda Scarcela Undergraduate student - UFC 📧 🔗 |
Carlos Caminha Academic advisor - UFC 📧 🔗 |
José Wellington Franco Academic advisor - UFC 📧 🔗 |
This project is licensed under the MIT License.
For questions, suggestions, or feedback:
- 📧 Email: [email protected]
- 🔗 LinkedIn: Zairo Bastos