Merge pull request #3 from sqlballs/adding-notebook

sqlballs · web-flow · commit 81c93ac24dd2 · 2025-06-19T10:58:51.000-04:00
Adding notebook
diff --git a/Microsoft Fabric/Find Status of Jobs/Find Status of Jobs.ipynb b/Microsoft Fabric/Find Status of Jobs/Find Status of Jobs.ipynb
@@ -0,0 +1,173 @@
+{
+    "cells": [
+        {
+            "cell_type": "code",
+            "source": [
+                "# Replace with your actual item ID\n",
+                "item_id = \"\"  #datapipeline\n",
+                "#item_id = \"\" #copyjob"
+            ],
+            "outputs": [],
+            "execution_count": null,
+            "metadata": {
+                "microsoft": {
+                    "language": "python",
+                    "language_group": "synapse_pyspark"
+                },
+                "tags": [
+                    "parameters"
+                ]
+            },
+            "id": "196c5c86-4789-4f75-87aa-27741b6522d5"
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "import requests\n",
+                "import json\n",
+                "import sempy.fabric as fabric\n",
+                "\n",
+                "\n",
+                "# Get the current workspace ID dynamically\n",
+                "workspace_id = fabric.get_notebook_workspace_id()\n",
+                "\n",
+                "# Get the current user's access token\n",
+                "access_token = notebookutils.credentials.getToken('pbi')\n",
+                "\n",
+                "# Construct the API URL Jobs Instances\n",
+                "url = f\"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{item_id}/jobs/instances\"\n",
+                "\n",
+                "\n",
+                "# Set headers\n",
+                "headers = {\n",
+                "    \"Authorization\": f\"Bearer {access_token}\",\n",
+                "    \"Content-Type\": \"application/json\"\n",
+                "}\n",
+                "\n",
+                "# Make the API request\n",
+                "response = requests.get(url, headers=headers)\n",
+                "\n",
+                "\n",
+                "# Check and process the response\n",
+                "if response.status_code == 200:\n",
+                "    data = response.json()\n",
+                "    job_instances = data.get(\"value\", []) # Extract the list of job instances\n",
+                "    if job_instances:\n",
+                "        \n",
+                "        json_lines = [json.dumps(job_instances) for job_instance in job_instances]\n",
+                "        \n",
+                "        df = spark.read.json(spark.sparkContext.parallelize(json_lines))\n",
+                "        #print('df about to run')\n",
+                "        #df.show(truncate=False)\n",
+                "        #display(df)\n",
+                "    else:\n",
+                "        print(\"No job instances found.\")\n",
+                "else:\n",
+                "    print(f\"Failed to fetch job status: {response.status_code} - {response.text}\")\n"
+            ],
+            "outputs": [],
+            "execution_count": null,
+            "metadata": {
+                "microsoft": {
+                    "language": "python",
+                    "language_group": "synapse_pyspark"
+                },
+                "collapsed": false
+            },
+            "id": "0da9ff9f-d002-4c9f-b117-ddc6b3b153ab"
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "from pyspark.sql.functions import col\n",
+                "from pyspark.sql.types import TimestampType\n",
+                "\n",
+                "# Ensure startTimeUtc is in timestamp format\n",
+                "df = df.withColumn(\"startTimeUtc\", col(\"startTimeUtc\").cast(TimestampType()))\n",
+                "\n",
+                "# Sort by startTimeUtc descending\n",
+                "df_sorted = df.orderBy(col(\"startTimeUtc\").desc())\n",
+                "\n",
+                "# Filter out rows where status is 'Deduped'\n",
+                "df_filtered = df_sorted.filter(col(\"status\") != \"Deduped\")\n",
+                "\n",
+                "# Get the first non-Deduped status\n",
+                "latest_status = df_filtered.select(\"status\").first()[\"status\"]\n",
+                "\n",
+                "#print(f\"The latest non-Deduped status is: {latest_status}\")\n"
+            ],
+            "outputs": [],
+            "execution_count": null,
+            "metadata": {
+                "microsoft": {
+                    "language": "python",
+                    "language_group": "synapse_pyspark"
+                }
+            },
+            "id": "815a3cc8-e93b-4679-971f-d4d41081ea57"
+        },
+        {
+            "cell_type": "code",
+            "source": [
+                "notebookutils.notebook.exit(latest_status)"
+            ],
+            "outputs": [],
+            "execution_count": null,
+            "metadata": {
+                "microsoft": {
+                    "language": "python",
+                    "language_group": "synapse_pyspark"
+                }
+            },
+            "id": "34903b61-df71-47d6-9e1d-cf643983ddfc"
+        }
+    ],
+    "metadata": {
+        "kernel_info": {
+            "name": "synapse_pyspark"
+        },
+        "kernelspec": {
+            "name": "synapse_pyspark",
+            "display_name": "synapse_pyspark"
+        },
+        "language_info": {
+            "name": "python"
+        },
+        "microsoft": {
+            "language": "python",
+            "language_group": "synapse_pyspark",
+            "ms_spell_check": {
+                "ms_spell_check_language": "en"
+            }
+        },
+        "nteract": {
+            "version": "nteract-front-end@1.0.0"
+        },
+        "synapse_widget": {
+            "version": "0.1",
+            "state": {}
+        },
+        "spark_compute": {
+            "compute_id": "/trident/default",
+            "session_options": {
+                "conf": {
+                    "spark.synapse.nbs.session.timeout": "1200000"
+                }
+            }
+        },
+        "dependencies": {
+            "lakehouse": {
+                "known_lakehouses": [
+                    {
+                        "id": ""
+                    }
+                ],
+                "default_lakehouse": "",
+                "default_lakehouse_name": "",
+                "default_lakehouse_workspace_id": ""
+            }
+        }
+    },
+    "nbformat": 4,
+    "nbformat_minor": 5
+}
diff --git a/Microsoft Fabric/Find Status of Jobs/readme.md b/Microsoft Fabric/Find Status of Jobs/readme.md
@@ -0,0 +1,40 @@
+# Find Status of Jobs
+
+This repository contains a Microsoft Fabric notebook that demonstrates how to use the **List Item Job Instances** API to find the status of jobs within Fabric.
+
+## 🔍 Overview
+
+The notebook leverages the https://learn.microsoft.com/en-us/rest/api/fabric/core/job-scheduler/list-item-job-instances to retrieve job execution details. This is particularly useful for monitoring and diagnostics in Fabric-based solutions.
+
+## 📓 Notebook Author
+
+This notebook was created by **Bradley Ball**, a Sr. Cloud Solution Architect and host of the *Tales from the Field* YouTube channel.
+
+## 🎥 Watch the Video
+
+You can watch the full walkthrough and explanation of this notebook on YouTube:
+
+<p><a href="https://www.youtube.com/watch?v=TSTeoeCNh7c"><img src="https://img.youtube.com/vi/TSTeoeCNh7c/0.jpg" height = 200></a> 
+
+## 📂 Contents
+
+- `FindStatusOfJobs.ipynb`: The main notebook that demonstrates how to call the API and interpret the results.
+- `README.md`: This file.
+
+## 📌 Prerequisites
+
+- Access to Microsoft Fabric
+- Proper authentication setup to call Fabric REST APIs
+- Familiarity with notebooks in Microsoft Fabric
+
+## 🛠️ How to Use
+
+1. Open the notebook in Microsoft Fabric.
+2. Follow the instructions to authenticate and run the API call.
+3. Review the returned job instance data for insights into job execution.
+
+## 📄 Documentation
+For more details on the API used, refer to the official Microsoft documentation:  
+👉 ([List Item Job Instances API](https://learn.microsoft.com/en-us/rest/api/fabric/core/job-scheduler/list-item-job-instances) with ❤️ by Bradley Ball)  
+> https://www.youtube.com/@TalesFromTheField
+
diff --git a/Microsoft Fabric/Find Status of Jobs/readme.me b/Microsoft Fabric/Find Status of Jobs/readme.me