From 7558ab069b42e8e740297dcc11e350912de22b75 Mon Sep 17 00:00:00 2001 From: leebeanbin Date: Thu, 2 Jan 2025 13:05:08 +0900 Subject: [PATCH 1/5] N-1/06-DocumentLoader/09-Json_Loader --- 06-DocumentLoader/09-JSON-Loader.ipynb | 435 +++++++++++++++++++++++++ 06-DocumentLoader/data/people.json | 189 +++++++++++ pyproject.toml | 1 + 3 files changed, 625 insertions(+) create mode 100644 06-DocumentLoader/09-JSON-Loader.ipynb create mode 100644 06-DocumentLoader/data/people.json diff --git a/06-DocumentLoader/09-JSON-Loader.ipynb b/06-DocumentLoader/09-JSON-Loader.ipynb new file mode 100644 index 000000000..3b0302334 --- /dev/null +++ b/06-DocumentLoader/09-JSON-Loader.ipynb @@ -0,0 +1,435 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# JSON\n", + "\n", + "Let's look at how to load files with the `.json` extension using a loader.\n", + "\n", + "- Author: [leebeanbin](https://github.com/leebeanbin)\n", + "- Design:\n", + "- Peer Review: \n", + "- This is a part of [LangChain Open Tutorial](https://github.com/LangChain-OpenTutorial/LangChain-OpenTutorial/tree/main/06-DocumentLoader)\n", + "\n", + "## Overview\n", + "This tutorial demonstrates how to use LangChain's JSONLoader to load and process JSON files. We'll explore how to extract specific data from structured JSON files using jq-style queries.\n", + "\n", + "### Table of Contents\n", + "- [JSON](#json)\n", + "- [Overview](#overview)\n", + "- [Generate JSON Data](#generate-json-data)\n", + "- [JSONLoader](#jsonloader)\n", + " \n", + "When you want to extract values under the content field within the message key of JSON data, you can easily do this using JSONLoader as shown below.\n", + "\n", + "reference: https://python.langchain.com/docs/how_to/document_loader_json/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generate JSON Data\n", + "\n", + "---\n", + "\n", + "if you want to generate JSON data, you can use the following code.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated and saved JSON data:\n", + "{'people': [{'address': {'city': 'Springfield',\n", + " 'country': 'USA',\n", + " 'state': 'IL',\n", + " 'street': '123 Maple St',\n", + " 'zip': '62704'},\n", + " 'age': 28,\n", + " 'contact': {'email': 'alice.johnson@example.com',\n", + " 'phone': '+1-555-0123',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/alicejohnson',\n", + " 'twitter': '@alice_j'}},\n", + " 'interesting_fact': 'Alice has traveled to over 15 countries and '\n", + " 'speaks 3 languages.',\n", + " 'name': {'first': 'Alice', 'last': 'Johnson'},\n", + " 'personal_preferences': {'favorite_food': 'Italian',\n", + " 'hobbies': ['Reading',\n", + " 'Hiking',\n", + " 'Cooking'],\n", + " 'music_genre': 'Jazz',\n", + " 'travel_destinations': ['Japan',\n", + " 'Italy',\n", + " 'Canada']}},\n", + " {'address': {'city': 'Metropolis',\n", + " 'country': 'USA',\n", + " 'state': 'NY',\n", + " 'street': '456 Oak Ave',\n", + " 'zip': '10001'},\n", + " 'age': 34,\n", + " 'contact': {'email': 'bob.smith@example.com',\n", + " 'phone': '+1-555-0456',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/bobsmith',\n", + " 'twitter': '@bobsmith34'}},\n", + " 'interesting_fact': 'Bob is an avid gamer and has competed in '\n", + " 'several national tournaments.',\n", + " 'name': {'first': 'Bob', 'last': 'Smith'},\n", + " 'personal_preferences': {'favorite_food': 'Mexican',\n", + " 'hobbies': ['Photography',\n", + " 'Cycling',\n", + " 'Video Games'],\n", + " 'music_genre': 'Rock',\n", + " 'travel_destinations': ['Brazil',\n", + " 'Australia',\n", + " 'Germany']}},\n", + " {'address': {'city': 'Gotham',\n", + " 'country': 'USA',\n", + " 'state': 'NJ',\n", + " 'street': '789 Pine Rd',\n", + " 'zip': '07001'},\n", + " 'age': 45,\n", + " 'contact': {'email': 'charlie.davis@example.com',\n", + " 'phone': '+1-555-0789',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/charliedavis',\n", + " 'twitter': '@charliedavis45'}},\n", + " 'interesting_fact': 'Charlie has a small farm where he raises '\n", + " 'chickens and grows organic vegetables.',\n", + " 'name': {'first': 'Charlie', 'last': 'Davis'},\n", + " 'personal_preferences': {'favorite_food': 'Barbecue',\n", + " 'hobbies': ['Gardening',\n", + " 'Fishing',\n", + " 'Woodworking'],\n", + " 'music_genre': 'Country',\n", + " 'travel_destinations': ['Canada',\n", + " 'New Zealand',\n", + " 'Norway']}},\n", + " {'address': {'city': 'Star City',\n", + " 'country': 'USA',\n", + " 'state': 'CA',\n", + " 'street': '234 Birch Blvd',\n", + " 'zip': '90001'},\n", + " 'age': 22,\n", + " 'contact': {'email': 'dana.lee@example.com',\n", + " 'phone': '+1-555-0111',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/danalee',\n", + " 'twitter': '@danalee22'}},\n", + " 'interesting_fact': 'Dana is a dance instructor and has won '\n", + " 'several local competitions.',\n", + " 'name': {'first': 'Dana', 'last': 'Lee'},\n", + " 'personal_preferences': {'favorite_food': 'Thai',\n", + " 'hobbies': ['Dancing',\n", + " 'Sketching',\n", + " 'Traveling'],\n", + " 'music_genre': 'Pop',\n", + " 'travel_destinations': ['Thailand',\n", + " 'France',\n", + " 'Spain']}},\n", + " {'address': {'city': 'Central City',\n", + " 'country': 'USA',\n", + " 'state': 'TX',\n", + " 'street': '345 Cedar St',\n", + " 'zip': '75001'},\n", + " 'age': 31,\n", + " 'contact': {'email': 'ethan.garcia@example.com',\n", + " 'phone': '+1-555-0999',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/ethangarcia',\n", + " 'twitter': '@ethangarcia31'}},\n", + " 'interesting_fact': 'Ethan runs a popular travel blog where he '\n", + " 'shares his adventures and culinary '\n", + " 'experiences.',\n", + " 'name': {'first': 'Ethan', 'last': 'Garcia'},\n", + " 'personal_preferences': {'favorite_food': 'Indian',\n", + " 'hobbies': ['Running',\n", + " 'Travel Blogging',\n", + " 'Cooking'],\n", + " 'music_genre': 'Hip-Hop',\n", + " 'travel_destinations': ['India',\n", + " 'Italy',\n", + " 'Mexico']}}]}\n" + ] + } + ], + "source": [ + "from langchain.prompts import PromptTemplate\n", + "from langchain_openai import ChatOpenAI\n", + "from pathlib import Path\n", + "from dotenv import load_dotenv\n", + "import json\n", + "import os\n", + "\n", + "# Load .env file\n", + "load_dotenv()\n", + "\n", + "# Initialize ChatOpenAI\n", + "llm = ChatOpenAI(\n", + " model=\"gpt-4o-mini\",\n", + " temperature=0.7,\n", + " model_kwargs={\"response_format\": {\"type\": \"json_object\"}}\n", + ")\n", + "\n", + "# Create prompt template\n", + "prompt = PromptTemplate(\n", + " input_variables=[],\n", + " template=\"\"\"Generate a JSON array containing detailed personal information for 5 people. \n", + " Include various fields like name, age, contact details, address, personal preferences, and any other interesting information you think would be relevant.\"\"\"\n", + ")\n", + "\n", + "# Create and invoke runnable sequence using the new pipe syntax\n", + "response = (prompt | llm).invoke({})\n", + "generated_data = json.loads(response.content)\n", + "\n", + "# Save to JSON file\n", + "current_dir = Path().absolute()\n", + "data_dir = current_dir / \"data\"\n", + "data_dir.mkdir(exist_ok=True)\n", + "\n", + "file_path = data_dir / \"people.json\"\n", + "with open(file_path, \"w\", encoding=\"utf-8\") as f:\n", + " json.dump(generated_data, f, ensure_ascii=False, indent=2)\n", + "\n", + "print(\"Generated and saved JSON data:\")\n", + "pprint(generated_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The case of loading JSON data is as follows when you want to load your own JSON data." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'people': [{'address': {'city': 'Springfield',\n", + " 'country': 'USA',\n", + " 'state': 'IL',\n", + " 'street': '123 Maple St',\n", + " 'zip': '62704'},\n", + " 'age': 28,\n", + " 'contact': {'email': 'alice.johnson@example.com',\n", + " 'phone': '+1-555-0123',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/alicejohnson',\n", + " 'twitter': '@alice_j'}},\n", + " 'interesting_fact': 'Alice has traveled to over 15 countries and '\n", + " 'speaks 3 languages.',\n", + " 'name': {'first': 'Alice', 'last': 'Johnson'},\n", + " 'personal_preferences': {'favorite_food': 'Italian',\n", + " 'hobbies': ['Reading',\n", + " 'Hiking',\n", + " 'Cooking'],\n", + " 'music_genre': 'Jazz',\n", + " 'travel_destinations': ['Japan',\n", + " 'Italy',\n", + " 'Canada']}},\n", + " {'address': {'city': 'Metropolis',\n", + " 'country': 'USA',\n", + " 'state': 'NY',\n", + " 'street': '456 Oak Ave',\n", + " 'zip': '10001'},\n", + " 'age': 34,\n", + " 'contact': {'email': 'bob.smith@example.com',\n", + " 'phone': '+1-555-0456',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/bobsmith',\n", + " 'twitter': '@bobsmith34'}},\n", + " 'interesting_fact': 'Bob is an avid gamer and has competed in '\n", + " 'several national tournaments.',\n", + " 'name': {'first': 'Bob', 'last': 'Smith'},\n", + " 'personal_preferences': {'favorite_food': 'Mexican',\n", + " 'hobbies': ['Photography',\n", + " 'Cycling',\n", + " 'Video Games'],\n", + " 'music_genre': 'Rock',\n", + " 'travel_destinations': ['Brazil',\n", + " 'Australia',\n", + " 'Germany']}},\n", + " {'address': {'city': 'Gotham',\n", + " 'country': 'USA',\n", + " 'state': 'NJ',\n", + " 'street': '789 Pine Rd',\n", + " 'zip': '07001'},\n", + " 'age': 45,\n", + " 'contact': {'email': 'charlie.davis@example.com',\n", + " 'phone': '+1-555-0789',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/charliedavis',\n", + " 'twitter': '@charliedavis45'}},\n", + " 'interesting_fact': 'Charlie has a small farm where he raises '\n", + " 'chickens and grows organic vegetables.',\n", + " 'name': {'first': 'Charlie', 'last': 'Davis'},\n", + " 'personal_preferences': {'favorite_food': 'Barbecue',\n", + " 'hobbies': ['Gardening',\n", + " 'Fishing',\n", + " 'Woodworking'],\n", + " 'music_genre': 'Country',\n", + " 'travel_destinations': ['Canada',\n", + " 'New Zealand',\n", + " 'Norway']}},\n", + " {'address': {'city': 'Star City',\n", + " 'country': 'USA',\n", + " 'state': 'CA',\n", + " 'street': '234 Birch Blvd',\n", + " 'zip': '90001'},\n", + " 'age': 22,\n", + " 'contact': {'email': 'dana.lee@example.com',\n", + " 'phone': '+1-555-0111',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/danalee',\n", + " 'twitter': '@danalee22'}},\n", + " 'interesting_fact': 'Dana is a dance instructor and has won '\n", + " 'several local competitions.',\n", + " 'name': {'first': 'Dana', 'last': 'Lee'},\n", + " 'personal_preferences': {'favorite_food': 'Thai',\n", + " 'hobbies': ['Dancing',\n", + " 'Sketching',\n", + " 'Traveling'],\n", + " 'music_genre': 'Pop',\n", + " 'travel_destinations': ['Thailand',\n", + " 'France',\n", + " 'Spain']}},\n", + " {'address': {'city': 'Central City',\n", + " 'country': 'USA',\n", + " 'state': 'TX',\n", + " 'street': '345 Cedar St',\n", + " 'zip': '75001'},\n", + " 'age': 31,\n", + " 'contact': {'email': 'ethan.garcia@example.com',\n", + " 'phone': '+1-555-0999',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/ethangarcia',\n", + " 'twitter': '@ethangarcia31'}},\n", + " 'interesting_fact': 'Ethan runs a popular travel blog where he '\n", + " 'shares his adventures and culinary '\n", + " 'experiences.',\n", + " 'name': {'first': 'Ethan', 'last': 'Garcia'},\n", + " 'personal_preferences': {'favorite_food': 'Indian',\n", + " 'hobbies': ['Running',\n", + " 'Travel Blogging',\n", + " 'Cooking'],\n", + " 'music_genre': 'Hip-Hop',\n", + " 'travel_destinations': ['India',\n", + " 'Italy',\n", + " 'Mexico']}}]}\n" + ] + } + ], + "source": [ + "import json\n", + "from pathlib import Path\n", + "from pprint import pprint\n", + "\n", + "\n", + "file_path = \"data/people.json\"\n", + "data = json.loads(Path(file_path).read_text())\n", + "\n", + "pprint(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "print(type(data))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# JSONLoader\n", + "\n", + "---\n", + "\n", + "When you want to extract values under the content field within the message key of JSON data, you can easily do this using JSONLoader as shown below." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 1}, page_content='{\"name\": \"Alice Smith\", \"age\": 32, \"contact\": {\"email\": \"alice.smith@example.com\", \"phone\": \"555-123-4567\"}, \"address\": {\"street\": \"123 Main St\", \"city\": \"New York\", \"state\": \"NY\", \"zip\": \"10001\"}, \"personal_preferences\": {\"favorite_color\": \"blue\", \"hobbies\": [\"reading\", \"yoga\"], \"favorite_food\": \"sushi\"}}'),\n", + " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 2}, page_content='{\"name\": \"John Doe\", \"age\": 45, \"contact\": {\"email\": \"john.doe@example.com\", \"phone\": \"555-987-6543\"}, \"address\": {\"street\": \"456 Elm St\", \"city\": \"Los Angeles\", \"state\": \"CA\", \"zip\": \"90001\"}, \"personal_preferences\": {\"favorite_color\": \"green\", \"hobbies\": [\"hiking\", \"gardening\"], \"favorite_food\": \"pizza\"}}'),\n", + " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 3}, page_content='{\"name\": \"Emily Johnson\", \"age\": 28, \"contact\": {\"email\": \"emily.johnson@example.com\", \"phone\": \"555-456-7890\"}, \"address\": {\"street\": \"789 Oak St\", \"city\": \"Chicago\", \"state\": \"IL\", \"zip\": \"60601\"}, \"personal_preferences\": {\"favorite_color\": \"pink\", \"hobbies\": [\"painting\", \"traveling\"], \"favorite_food\": \"tacos\"}}'),\n", + " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 4}, page_content='{\"name\": \"Michael Brown\", \"age\": 38, \"contact\": {\"email\": \"michael.brown@example.com\", \"phone\": \"555-234-5678\"}, \"address\": {\"street\": \"321 Maple St\", \"city\": \"Houston\", \"state\": \"TX\", \"zip\": \"77001\"}, \"personal_preferences\": {\"favorite_color\": \"red\", \"hobbies\": [\"playing guitar\", \"cooking\"], \"favorite_food\": \"barbecue\"}}'),\n", + " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 5}, page_content='{\"name\": \"Sarah Wilson\", \"age\": 35, \"contact\": {\"email\": \"sarah.wilson@example.com\", \"phone\": \"555-345-6789\"}, \"address\": {\"street\": \"654 Pine St\", \"city\": \"Miami\", \"state\": \"FL\", \"zip\": \"33101\"}, \"personal_preferences\": {\"favorite_color\": \"purple\", \"hobbies\": [\"photography\", \"dancing\"], \"favorite_food\": \"sushi\"}}')]\n" + ] + } + ], + "source": [ + "from langchain_community.document_loaders import JSONLoader\n", + "\n", + "# Create JSONLoader\n", + "loader = JSONLoader(\n", + " file_path=\"data/people.json\",\n", + " jq_schema=\".people[]\", # Access each item in the people array\n", + " text_content=False,\n", + ")\n", + "\n", + "# Example: extract only contact_details\n", + "# loader = JSONLoader(\n", + "# file_path=\"data/people.json\",\n", + "# jq_schema=\".people[].contact_details\",\n", + "# text_content=False,\n", + "# )\n", + "\n", + "# Or extract only hobbies from personal_preferences\n", + "# loader = JSONLoader(\n", + "# file_path=\"data/people.json\",\n", + "# jq_schema=\".people[].personal_preferences.hobbies\",\n", + "# text_content=False,\n", + "# )\n", + "\n", + "# Load documents\n", + "docs = loader.load()\n", + "pprint(docs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "langchain-opentutorial-LGorndcz-py3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/06-DocumentLoader/data/people.json b/06-DocumentLoader/data/people.json new file mode 100644 index 000000000..67bf8ffd5 --- /dev/null +++ b/06-DocumentLoader/data/people.json @@ -0,0 +1,189 @@ +{ + "people": [ + { + "name": { + "first": "Alice", + "last": "Johnson" + }, + "age": 28, + "contact": { + "email": "alice.johnson@example.com", + "phone": "+1-555-0123", + "social_media": { + "twitter": "@alice_j", + "linkedin": "linkedin.com/in/alicejohnson" + } + }, + "address": { + "street": "123 Maple St", + "city": "Springfield", + "state": "IL", + "zip": "62704", + "country": "USA" + }, + "personal_preferences": { + "hobbies": [ + "Reading", + "Hiking", + "Cooking" + ], + "favorite_food": "Italian", + "music_genre": "Jazz", + "travel_destinations": [ + "Japan", + "Italy", + "Canada" + ] + }, + "interesting_fact": "Alice has traveled to over 15 countries and speaks 3 languages." + }, + { + "name": { + "first": "Bob", + "last": "Smith" + }, + "age": 34, + "contact": { + "email": "bob.smith@example.com", + "phone": "+1-555-0456", + "social_media": { + "twitter": "@bobsmith34", + "linkedin": "linkedin.com/in/bobsmith" + } + }, + "address": { + "street": "456 Oak Ave", + "city": "Metropolis", + "state": "NY", + "zip": "10001", + "country": "USA" + }, + "personal_preferences": { + "hobbies": [ + "Photography", + "Cycling", + "Video Games" + ], + "favorite_food": "Mexican", + "music_genre": "Rock", + "travel_destinations": [ + "Brazil", + "Australia", + "Germany" + ] + }, + "interesting_fact": "Bob is an avid gamer and has competed in several national tournaments." + }, + { + "name": { + "first": "Charlie", + "last": "Davis" + }, + "age": 45, + "contact": { + "email": "charlie.davis@example.com", + "phone": "+1-555-0789", + "social_media": { + "twitter": "@charliedavis45", + "linkedin": "linkedin.com/in/charliedavis" + } + }, + "address": { + "street": "789 Pine Rd", + "city": "Gotham", + "state": "NJ", + "zip": "07001", + "country": "USA" + }, + "personal_preferences": { + "hobbies": [ + "Gardening", + "Fishing", + "Woodworking" + ], + "favorite_food": "Barbecue", + "music_genre": "Country", + "travel_destinations": [ + "Canada", + "New Zealand", + "Norway" + ] + }, + "interesting_fact": "Charlie has a small farm where he raises chickens and grows organic vegetables." + }, + { + "name": { + "first": "Dana", + "last": "Lee" + }, + "age": 22, + "contact": { + "email": "dana.lee@example.com", + "phone": "+1-555-0111", + "social_media": { + "twitter": "@danalee22", + "linkedin": "linkedin.com/in/danalee" + } + }, + "address": { + "street": "234 Birch Blvd", + "city": "Star City", + "state": "CA", + "zip": "90001", + "country": "USA" + }, + "personal_preferences": { + "hobbies": [ + "Dancing", + "Sketching", + "Traveling" + ], + "favorite_food": "Thai", + "music_genre": "Pop", + "travel_destinations": [ + "Thailand", + "France", + "Spain" + ] + }, + "interesting_fact": "Dana is a dance instructor and has won several local competitions." + }, + { + "name": { + "first": "Ethan", + "last": "Garcia" + }, + "age": 31, + "contact": { + "email": "ethan.garcia@example.com", + "phone": "+1-555-0999", + "social_media": { + "twitter": "@ethangarcia31", + "linkedin": "linkedin.com/in/ethangarcia" + } + }, + "address": { + "street": "345 Cedar St", + "city": "Central City", + "state": "TX", + "zip": "75001", + "country": "USA" + }, + "personal_preferences": { + "hobbies": [ + "Running", + "Travel Blogging", + "Cooking" + ], + "favorite_food": "Indian", + "music_genre": "Hip-Hop", + "travel_destinations": [ + "India", + "Italy", + "Mexico" + ] + }, + "interesting_fact": "Ethan runs a popular travel blog where he shares his adventures and culinary experiences." + } + ] +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 95491bfb9..05fccedfc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ ragas = "^0.2.9" # Jupyter jupyter = "^1.1.1" notebook = "^7.3.2" +jq = "^1.8.0" From 82aaed93bf0547ffb53d8d0c7801c354c350db40 Mon Sep 17 00:00:00 2001 From: leebeanbin Date: Thu, 2 Jan 2025 13:05:08 +0900 Subject: [PATCH 2/5] N-1/06-DocumentLoader/09-Json_Loader [Title] JsonLoader [Version] Initial [Language] ENG [Packages] langchain, langchain-openai, langchain-community MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - json 생성 로직 추가 --- 06-DocumentLoader/09-JSON-Loader.ipynb | 435 +++++++++++++++++++++++++ 06-DocumentLoader/data/people.json | 189 +++++++++++ pyproject.toml | 1 + 3 files changed, 625 insertions(+) create mode 100644 06-DocumentLoader/09-JSON-Loader.ipynb create mode 100644 06-DocumentLoader/data/people.json diff --git a/06-DocumentLoader/09-JSON-Loader.ipynb b/06-DocumentLoader/09-JSON-Loader.ipynb new file mode 100644 index 000000000..3b0302334 --- /dev/null +++ b/06-DocumentLoader/09-JSON-Loader.ipynb @@ -0,0 +1,435 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# JSON\n", + "\n", + "Let's look at how to load files with the `.json` extension using a loader.\n", + "\n", + "- Author: [leebeanbin](https://github.com/leebeanbin)\n", + "- Design:\n", + "- Peer Review: \n", + "- This is a part of [LangChain Open Tutorial](https://github.com/LangChain-OpenTutorial/LangChain-OpenTutorial/tree/main/06-DocumentLoader)\n", + "\n", + "## Overview\n", + "This tutorial demonstrates how to use LangChain's JSONLoader to load and process JSON files. We'll explore how to extract specific data from structured JSON files using jq-style queries.\n", + "\n", + "### Table of Contents\n", + "- [JSON](#json)\n", + "- [Overview](#overview)\n", + "- [Generate JSON Data](#generate-json-data)\n", + "- [JSONLoader](#jsonloader)\n", + " \n", + "When you want to extract values under the content field within the message key of JSON data, you can easily do this using JSONLoader as shown below.\n", + "\n", + "reference: https://python.langchain.com/docs/how_to/document_loader_json/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generate JSON Data\n", + "\n", + "---\n", + "\n", + "if you want to generate JSON data, you can use the following code.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated and saved JSON data:\n", + "{'people': [{'address': {'city': 'Springfield',\n", + " 'country': 'USA',\n", + " 'state': 'IL',\n", + " 'street': '123 Maple St',\n", + " 'zip': '62704'},\n", + " 'age': 28,\n", + " 'contact': {'email': 'alice.johnson@example.com',\n", + " 'phone': '+1-555-0123',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/alicejohnson',\n", + " 'twitter': '@alice_j'}},\n", + " 'interesting_fact': 'Alice has traveled to over 15 countries and '\n", + " 'speaks 3 languages.',\n", + " 'name': {'first': 'Alice', 'last': 'Johnson'},\n", + " 'personal_preferences': {'favorite_food': 'Italian',\n", + " 'hobbies': ['Reading',\n", + " 'Hiking',\n", + " 'Cooking'],\n", + " 'music_genre': 'Jazz',\n", + " 'travel_destinations': ['Japan',\n", + " 'Italy',\n", + " 'Canada']}},\n", + " {'address': {'city': 'Metropolis',\n", + " 'country': 'USA',\n", + " 'state': 'NY',\n", + " 'street': '456 Oak Ave',\n", + " 'zip': '10001'},\n", + " 'age': 34,\n", + " 'contact': {'email': 'bob.smith@example.com',\n", + " 'phone': '+1-555-0456',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/bobsmith',\n", + " 'twitter': '@bobsmith34'}},\n", + " 'interesting_fact': 'Bob is an avid gamer and has competed in '\n", + " 'several national tournaments.',\n", + " 'name': {'first': 'Bob', 'last': 'Smith'},\n", + " 'personal_preferences': {'favorite_food': 'Mexican',\n", + " 'hobbies': ['Photography',\n", + " 'Cycling',\n", + " 'Video Games'],\n", + " 'music_genre': 'Rock',\n", + " 'travel_destinations': ['Brazil',\n", + " 'Australia',\n", + " 'Germany']}},\n", + " {'address': {'city': 'Gotham',\n", + " 'country': 'USA',\n", + " 'state': 'NJ',\n", + " 'street': '789 Pine Rd',\n", + " 'zip': '07001'},\n", + " 'age': 45,\n", + " 'contact': {'email': 'charlie.davis@example.com',\n", + " 'phone': '+1-555-0789',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/charliedavis',\n", + " 'twitter': '@charliedavis45'}},\n", + " 'interesting_fact': 'Charlie has a small farm where he raises '\n", + " 'chickens and grows organic vegetables.',\n", + " 'name': {'first': 'Charlie', 'last': 'Davis'},\n", + " 'personal_preferences': {'favorite_food': 'Barbecue',\n", + " 'hobbies': ['Gardening',\n", + " 'Fishing',\n", + " 'Woodworking'],\n", + " 'music_genre': 'Country',\n", + " 'travel_destinations': ['Canada',\n", + " 'New Zealand',\n", + " 'Norway']}},\n", + " {'address': {'city': 'Star City',\n", + " 'country': 'USA',\n", + " 'state': 'CA',\n", + " 'street': '234 Birch Blvd',\n", + " 'zip': '90001'},\n", + " 'age': 22,\n", + " 'contact': {'email': 'dana.lee@example.com',\n", + " 'phone': '+1-555-0111',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/danalee',\n", + " 'twitter': '@danalee22'}},\n", + " 'interesting_fact': 'Dana is a dance instructor and has won '\n", + " 'several local competitions.',\n", + " 'name': {'first': 'Dana', 'last': 'Lee'},\n", + " 'personal_preferences': {'favorite_food': 'Thai',\n", + " 'hobbies': ['Dancing',\n", + " 'Sketching',\n", + " 'Traveling'],\n", + " 'music_genre': 'Pop',\n", + " 'travel_destinations': ['Thailand',\n", + " 'France',\n", + " 'Spain']}},\n", + " {'address': {'city': 'Central City',\n", + " 'country': 'USA',\n", + " 'state': 'TX',\n", + " 'street': '345 Cedar St',\n", + " 'zip': '75001'},\n", + " 'age': 31,\n", + " 'contact': {'email': 'ethan.garcia@example.com',\n", + " 'phone': '+1-555-0999',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/ethangarcia',\n", + " 'twitter': '@ethangarcia31'}},\n", + " 'interesting_fact': 'Ethan runs a popular travel blog where he '\n", + " 'shares his adventures and culinary '\n", + " 'experiences.',\n", + " 'name': {'first': 'Ethan', 'last': 'Garcia'},\n", + " 'personal_preferences': {'favorite_food': 'Indian',\n", + " 'hobbies': ['Running',\n", + " 'Travel Blogging',\n", + " 'Cooking'],\n", + " 'music_genre': 'Hip-Hop',\n", + " 'travel_destinations': ['India',\n", + " 'Italy',\n", + " 'Mexico']}}]}\n" + ] + } + ], + "source": [ + "from langchain.prompts import PromptTemplate\n", + "from langchain_openai import ChatOpenAI\n", + "from pathlib import Path\n", + "from dotenv import load_dotenv\n", + "import json\n", + "import os\n", + "\n", + "# Load .env file\n", + "load_dotenv()\n", + "\n", + "# Initialize ChatOpenAI\n", + "llm = ChatOpenAI(\n", + " model=\"gpt-4o-mini\",\n", + " temperature=0.7,\n", + " model_kwargs={\"response_format\": {\"type\": \"json_object\"}}\n", + ")\n", + "\n", + "# Create prompt template\n", + "prompt = PromptTemplate(\n", + " input_variables=[],\n", + " template=\"\"\"Generate a JSON array containing detailed personal information for 5 people. \n", + " Include various fields like name, age, contact details, address, personal preferences, and any other interesting information you think would be relevant.\"\"\"\n", + ")\n", + "\n", + "# Create and invoke runnable sequence using the new pipe syntax\n", + "response = (prompt | llm).invoke({})\n", + "generated_data = json.loads(response.content)\n", + "\n", + "# Save to JSON file\n", + "current_dir = Path().absolute()\n", + "data_dir = current_dir / \"data\"\n", + "data_dir.mkdir(exist_ok=True)\n", + "\n", + "file_path = data_dir / \"people.json\"\n", + "with open(file_path, \"w\", encoding=\"utf-8\") as f:\n", + " json.dump(generated_data, f, ensure_ascii=False, indent=2)\n", + "\n", + "print(\"Generated and saved JSON data:\")\n", + "pprint(generated_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The case of loading JSON data is as follows when you want to load your own JSON data." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'people': [{'address': {'city': 'Springfield',\n", + " 'country': 'USA',\n", + " 'state': 'IL',\n", + " 'street': '123 Maple St',\n", + " 'zip': '62704'},\n", + " 'age': 28,\n", + " 'contact': {'email': 'alice.johnson@example.com',\n", + " 'phone': '+1-555-0123',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/alicejohnson',\n", + " 'twitter': '@alice_j'}},\n", + " 'interesting_fact': 'Alice has traveled to over 15 countries and '\n", + " 'speaks 3 languages.',\n", + " 'name': {'first': 'Alice', 'last': 'Johnson'},\n", + " 'personal_preferences': {'favorite_food': 'Italian',\n", + " 'hobbies': ['Reading',\n", + " 'Hiking',\n", + " 'Cooking'],\n", + " 'music_genre': 'Jazz',\n", + " 'travel_destinations': ['Japan',\n", + " 'Italy',\n", + " 'Canada']}},\n", + " {'address': {'city': 'Metropolis',\n", + " 'country': 'USA',\n", + " 'state': 'NY',\n", + " 'street': '456 Oak Ave',\n", + " 'zip': '10001'},\n", + " 'age': 34,\n", + " 'contact': {'email': 'bob.smith@example.com',\n", + " 'phone': '+1-555-0456',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/bobsmith',\n", + " 'twitter': '@bobsmith34'}},\n", + " 'interesting_fact': 'Bob is an avid gamer and has competed in '\n", + " 'several national tournaments.',\n", + " 'name': {'first': 'Bob', 'last': 'Smith'},\n", + " 'personal_preferences': {'favorite_food': 'Mexican',\n", + " 'hobbies': ['Photography',\n", + " 'Cycling',\n", + " 'Video Games'],\n", + " 'music_genre': 'Rock',\n", + " 'travel_destinations': ['Brazil',\n", + " 'Australia',\n", + " 'Germany']}},\n", + " {'address': {'city': 'Gotham',\n", + " 'country': 'USA',\n", + " 'state': 'NJ',\n", + " 'street': '789 Pine Rd',\n", + " 'zip': '07001'},\n", + " 'age': 45,\n", + " 'contact': {'email': 'charlie.davis@example.com',\n", + " 'phone': '+1-555-0789',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/charliedavis',\n", + " 'twitter': '@charliedavis45'}},\n", + " 'interesting_fact': 'Charlie has a small farm where he raises '\n", + " 'chickens and grows organic vegetables.',\n", + " 'name': {'first': 'Charlie', 'last': 'Davis'},\n", + " 'personal_preferences': {'favorite_food': 'Barbecue',\n", + " 'hobbies': ['Gardening',\n", + " 'Fishing',\n", + " 'Woodworking'],\n", + " 'music_genre': 'Country',\n", + " 'travel_destinations': ['Canada',\n", + " 'New Zealand',\n", + " 'Norway']}},\n", + " {'address': {'city': 'Star City',\n", + " 'country': 'USA',\n", + " 'state': 'CA',\n", + " 'street': '234 Birch Blvd',\n", + " 'zip': '90001'},\n", + " 'age': 22,\n", + " 'contact': {'email': 'dana.lee@example.com',\n", + " 'phone': '+1-555-0111',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/danalee',\n", + " 'twitter': '@danalee22'}},\n", + " 'interesting_fact': 'Dana is a dance instructor and has won '\n", + " 'several local competitions.',\n", + " 'name': {'first': 'Dana', 'last': 'Lee'},\n", + " 'personal_preferences': {'favorite_food': 'Thai',\n", + " 'hobbies': ['Dancing',\n", + " 'Sketching',\n", + " 'Traveling'],\n", + " 'music_genre': 'Pop',\n", + " 'travel_destinations': ['Thailand',\n", + " 'France',\n", + " 'Spain']}},\n", + " {'address': {'city': 'Central City',\n", + " 'country': 'USA',\n", + " 'state': 'TX',\n", + " 'street': '345 Cedar St',\n", + " 'zip': '75001'},\n", + " 'age': 31,\n", + " 'contact': {'email': 'ethan.garcia@example.com',\n", + " 'phone': '+1-555-0999',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/ethangarcia',\n", + " 'twitter': '@ethangarcia31'}},\n", + " 'interesting_fact': 'Ethan runs a popular travel blog where he '\n", + " 'shares his adventures and culinary '\n", + " 'experiences.',\n", + " 'name': {'first': 'Ethan', 'last': 'Garcia'},\n", + " 'personal_preferences': {'favorite_food': 'Indian',\n", + " 'hobbies': ['Running',\n", + " 'Travel Blogging',\n", + " 'Cooking'],\n", + " 'music_genre': 'Hip-Hop',\n", + " 'travel_destinations': ['India',\n", + " 'Italy',\n", + " 'Mexico']}}]}\n" + ] + } + ], + "source": [ + "import json\n", + "from pathlib import Path\n", + "from pprint import pprint\n", + "\n", + "\n", + "file_path = \"data/people.json\"\n", + "data = json.loads(Path(file_path).read_text())\n", + "\n", + "pprint(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "print(type(data))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# JSONLoader\n", + "\n", + "---\n", + "\n", + "When you want to extract values under the content field within the message key of JSON data, you can easily do this using JSONLoader as shown below." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 1}, page_content='{\"name\": \"Alice Smith\", \"age\": 32, \"contact\": {\"email\": \"alice.smith@example.com\", \"phone\": \"555-123-4567\"}, \"address\": {\"street\": \"123 Main St\", \"city\": \"New York\", \"state\": \"NY\", \"zip\": \"10001\"}, \"personal_preferences\": {\"favorite_color\": \"blue\", \"hobbies\": [\"reading\", \"yoga\"], \"favorite_food\": \"sushi\"}}'),\n", + " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 2}, page_content='{\"name\": \"John Doe\", \"age\": 45, \"contact\": {\"email\": \"john.doe@example.com\", \"phone\": \"555-987-6543\"}, \"address\": {\"street\": \"456 Elm St\", \"city\": \"Los Angeles\", \"state\": \"CA\", \"zip\": \"90001\"}, \"personal_preferences\": {\"favorite_color\": \"green\", \"hobbies\": [\"hiking\", \"gardening\"], \"favorite_food\": \"pizza\"}}'),\n", + " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 3}, page_content='{\"name\": \"Emily Johnson\", \"age\": 28, \"contact\": {\"email\": \"emily.johnson@example.com\", \"phone\": \"555-456-7890\"}, \"address\": {\"street\": \"789 Oak St\", \"city\": \"Chicago\", \"state\": \"IL\", \"zip\": \"60601\"}, \"personal_preferences\": {\"favorite_color\": \"pink\", \"hobbies\": [\"painting\", \"traveling\"], \"favorite_food\": \"tacos\"}}'),\n", + " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 4}, page_content='{\"name\": \"Michael Brown\", \"age\": 38, \"contact\": {\"email\": \"michael.brown@example.com\", \"phone\": \"555-234-5678\"}, \"address\": {\"street\": \"321 Maple St\", \"city\": \"Houston\", \"state\": \"TX\", \"zip\": \"77001\"}, \"personal_preferences\": {\"favorite_color\": \"red\", \"hobbies\": [\"playing guitar\", \"cooking\"], \"favorite_food\": \"barbecue\"}}'),\n", + " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 5}, page_content='{\"name\": \"Sarah Wilson\", \"age\": 35, \"contact\": {\"email\": \"sarah.wilson@example.com\", \"phone\": \"555-345-6789\"}, \"address\": {\"street\": \"654 Pine St\", \"city\": \"Miami\", \"state\": \"FL\", \"zip\": \"33101\"}, \"personal_preferences\": {\"favorite_color\": \"purple\", \"hobbies\": [\"photography\", \"dancing\"], \"favorite_food\": \"sushi\"}}')]\n" + ] + } + ], + "source": [ + "from langchain_community.document_loaders import JSONLoader\n", + "\n", + "# Create JSONLoader\n", + "loader = JSONLoader(\n", + " file_path=\"data/people.json\",\n", + " jq_schema=\".people[]\", # Access each item in the people array\n", + " text_content=False,\n", + ")\n", + "\n", + "# Example: extract only contact_details\n", + "# loader = JSONLoader(\n", + "# file_path=\"data/people.json\",\n", + "# jq_schema=\".people[].contact_details\",\n", + "# text_content=False,\n", + "# )\n", + "\n", + "# Or extract only hobbies from personal_preferences\n", + "# loader = JSONLoader(\n", + "# file_path=\"data/people.json\",\n", + "# jq_schema=\".people[].personal_preferences.hobbies\",\n", + "# text_content=False,\n", + "# )\n", + "\n", + "# Load documents\n", + "docs = loader.load()\n", + "pprint(docs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "langchain-opentutorial-LGorndcz-py3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/06-DocumentLoader/data/people.json b/06-DocumentLoader/data/people.json new file mode 100644 index 000000000..67bf8ffd5 --- /dev/null +++ b/06-DocumentLoader/data/people.json @@ -0,0 +1,189 @@ +{ + "people": [ + { + "name": { + "first": "Alice", + "last": "Johnson" + }, + "age": 28, + "contact": { + "email": "alice.johnson@example.com", + "phone": "+1-555-0123", + "social_media": { + "twitter": "@alice_j", + "linkedin": "linkedin.com/in/alicejohnson" + } + }, + "address": { + "street": "123 Maple St", + "city": "Springfield", + "state": "IL", + "zip": "62704", + "country": "USA" + }, + "personal_preferences": { + "hobbies": [ + "Reading", + "Hiking", + "Cooking" + ], + "favorite_food": "Italian", + "music_genre": "Jazz", + "travel_destinations": [ + "Japan", + "Italy", + "Canada" + ] + }, + "interesting_fact": "Alice has traveled to over 15 countries and speaks 3 languages." + }, + { + "name": { + "first": "Bob", + "last": "Smith" + }, + "age": 34, + "contact": { + "email": "bob.smith@example.com", + "phone": "+1-555-0456", + "social_media": { + "twitter": "@bobsmith34", + "linkedin": "linkedin.com/in/bobsmith" + } + }, + "address": { + "street": "456 Oak Ave", + "city": "Metropolis", + "state": "NY", + "zip": "10001", + "country": "USA" + }, + "personal_preferences": { + "hobbies": [ + "Photography", + "Cycling", + "Video Games" + ], + "favorite_food": "Mexican", + "music_genre": "Rock", + "travel_destinations": [ + "Brazil", + "Australia", + "Germany" + ] + }, + "interesting_fact": "Bob is an avid gamer and has competed in several national tournaments." + }, + { + "name": { + "first": "Charlie", + "last": "Davis" + }, + "age": 45, + "contact": { + "email": "charlie.davis@example.com", + "phone": "+1-555-0789", + "social_media": { + "twitter": "@charliedavis45", + "linkedin": "linkedin.com/in/charliedavis" + } + }, + "address": { + "street": "789 Pine Rd", + "city": "Gotham", + "state": "NJ", + "zip": "07001", + "country": "USA" + }, + "personal_preferences": { + "hobbies": [ + "Gardening", + "Fishing", + "Woodworking" + ], + "favorite_food": "Barbecue", + "music_genre": "Country", + "travel_destinations": [ + "Canada", + "New Zealand", + "Norway" + ] + }, + "interesting_fact": "Charlie has a small farm where he raises chickens and grows organic vegetables." + }, + { + "name": { + "first": "Dana", + "last": "Lee" + }, + "age": 22, + "contact": { + "email": "dana.lee@example.com", + "phone": "+1-555-0111", + "social_media": { + "twitter": "@danalee22", + "linkedin": "linkedin.com/in/danalee" + } + }, + "address": { + "street": "234 Birch Blvd", + "city": "Star City", + "state": "CA", + "zip": "90001", + "country": "USA" + }, + "personal_preferences": { + "hobbies": [ + "Dancing", + "Sketching", + "Traveling" + ], + "favorite_food": "Thai", + "music_genre": "Pop", + "travel_destinations": [ + "Thailand", + "France", + "Spain" + ] + }, + "interesting_fact": "Dana is a dance instructor and has won several local competitions." + }, + { + "name": { + "first": "Ethan", + "last": "Garcia" + }, + "age": 31, + "contact": { + "email": "ethan.garcia@example.com", + "phone": "+1-555-0999", + "social_media": { + "twitter": "@ethangarcia31", + "linkedin": "linkedin.com/in/ethangarcia" + } + }, + "address": { + "street": "345 Cedar St", + "city": "Central City", + "state": "TX", + "zip": "75001", + "country": "USA" + }, + "personal_preferences": { + "hobbies": [ + "Running", + "Travel Blogging", + "Cooking" + ], + "favorite_food": "Indian", + "music_genre": "Hip-Hop", + "travel_destinations": [ + "India", + "Italy", + "Mexico" + ] + }, + "interesting_fact": "Ethan runs a popular travel blog where he shares his adventures and culinary experiences." + } + ] +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 95491bfb9..05fccedfc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ ragas = "^0.2.9" # Jupyter jupyter = "^1.1.1" notebook = "^7.3.2" +jq = "^1.8.0" From a8cf121fa5da8efcb24cd3333f7a08cedec5a90a Mon Sep 17 00:00:00 2001 From: leebeanbin Date: Thu, 2 Jan 2025 19:55:14 +0900 Subject: [PATCH 3/5] N-1/06-DocumentLoader/10-Json_Loader [Title] JsonLoader [Version] Initial [Language] ENG [Packages] langchain, langchain-openai, langchain-community MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - json load 용 rq 의존성 추가(toml) - 뱃지, pprint 오류 반영 - 이슈 네임 & 파일명 수정 완료 --- 06-DocumentLoader/10-JSON-Loader.ipynb | 438 +++++++++++++++++++++++++ 1 file changed, 438 insertions(+) create mode 100644 06-DocumentLoader/10-JSON-Loader.ipynb diff --git a/06-DocumentLoader/10-JSON-Loader.ipynb b/06-DocumentLoader/10-JSON-Loader.ipynb new file mode 100644 index 000000000..ce7553df4 --- /dev/null +++ b/06-DocumentLoader/10-JSON-Loader.ipynb @@ -0,0 +1,438 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# JSON\n", + "\n", + "Let's look at how to load files with the `.json` extension using a loader.\n", + "\n", + "- Author: [leebeanbin](https://github.com/leebeanbin)\n", + "- Design:\n", + "- Peer Review: \n", + "- This is a part of [LangChain Open Tutorial](https://github.com/LangChain-OpenTutorial/LangChain-OpenTutorial/tree/main/06-DocumentLoader)\n", + "- [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain-academy/blob/main/module-4/sub-graph.ipynb) [![Open in LangChain Academy](https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/66e9eba12c7b7688aa3dbb5e_LCA-badge-green.svg)](https://academy.langchain.com/courses/take/intro-to-langgraph/lessons/58239937-lesson-2-sub-graphs)\n", + "\n", + "## Overview\n", + "This tutorial demonstrates how to use LangChain's JSONLoader to load and process JSON files. We'll explore how to extract specific data from structured JSON files using jq-style queries.\n", + "\n", + "### Table of Contents\n", + "- [JSON](#json)\n", + "- [Overview](#overview)\n", + "- [Generate JSON Data](#generate-json-data)\n", + "- [JSONLoader](#jsonloader)\n", + " \n", + "When you want to extract values under the content field within the message key of JSON data, you can easily do this using JSONLoader as shown below.\n", + "\n", + "### reference\n", + "- https://python.langchain.com/docs/how_to/document_loader_json/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generate JSON Data\n", + "\n", + "---\n", + "\n", + "if you want to generate JSON data, you can use the following code.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generated and saved JSON data:\n", + "{'people': [{'address': {'city': 'Springfield',\n", + " 'country': 'USA',\n", + " 'state': 'IL',\n", + " 'street': '123 Maple St',\n", + " 'zip': '62704'},\n", + " 'age': 28,\n", + " 'contact': {'email': 'alice.johnson@example.com',\n", + " 'phone': '+1-555-0123',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/alicejohnson',\n", + " 'twitter': '@alice_j'}},\n", + " 'interesting_fact': 'Alice has traveled to over 15 countries and '\n", + " 'speaks 3 languages.',\n", + " 'name': {'first': 'Alice', 'last': 'Johnson'},\n", + " 'personal_preferences': {'favorite_food': 'Italian',\n", + " 'hobbies': ['Reading',\n", + " 'Hiking',\n", + " 'Cooking'],\n", + " 'music_genre': 'Jazz',\n", + " 'travel_destinations': ['Japan',\n", + " 'Italy',\n", + " 'Canada']}},\n", + " {'address': {'city': 'Metropolis',\n", + " 'country': 'USA',\n", + " 'state': 'NY',\n", + " 'street': '456 Oak Ave',\n", + " 'zip': '10001'},\n", + " 'age': 34,\n", + " 'contact': {'email': 'bob.smith@example.com',\n", + " 'phone': '+1-555-0456',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/bobsmith',\n", + " 'twitter': '@bobsmith34'}},\n", + " 'interesting_fact': 'Bob is an avid gamer and has competed in '\n", + " 'several national tournaments.',\n", + " 'name': {'first': 'Bob', 'last': 'Smith'},\n", + " 'personal_preferences': {'favorite_food': 'Mexican',\n", + " 'hobbies': ['Photography',\n", + " 'Cycling',\n", + " 'Video Games'],\n", + " 'music_genre': 'Rock',\n", + " 'travel_destinations': ['Brazil',\n", + " 'Australia',\n", + " 'Germany']}},\n", + " {'address': {'city': 'Gotham',\n", + " 'country': 'USA',\n", + " 'state': 'NJ',\n", + " 'street': '789 Pine Rd',\n", + " 'zip': '07001'},\n", + " 'age': 45,\n", + " 'contact': {'email': 'charlie.davis@example.com',\n", + " 'phone': '+1-555-0789',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/charliedavis',\n", + " 'twitter': '@charliedavis45'}},\n", + " 'interesting_fact': 'Charlie has a small farm where he raises '\n", + " 'chickens and grows organic vegetables.',\n", + " 'name': {'first': 'Charlie', 'last': 'Davis'},\n", + " 'personal_preferences': {'favorite_food': 'Barbecue',\n", + " 'hobbies': ['Gardening',\n", + " 'Fishing',\n", + " 'Woodworking'],\n", + " 'music_genre': 'Country',\n", + " 'travel_destinations': ['Canada',\n", + " 'New Zealand',\n", + " 'Norway']}},\n", + " {'address': {'city': 'Star City',\n", + " 'country': 'USA',\n", + " 'state': 'CA',\n", + " 'street': '234 Birch Blvd',\n", + " 'zip': '90001'},\n", + " 'age': 22,\n", + " 'contact': {'email': 'dana.lee@example.com',\n", + " 'phone': '+1-555-0111',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/danalee',\n", + " 'twitter': '@danalee22'}},\n", + " 'interesting_fact': 'Dana is a dance instructor and has won '\n", + " 'several local competitions.',\n", + " 'name': {'first': 'Dana', 'last': 'Lee'},\n", + " 'personal_preferences': {'favorite_food': 'Thai',\n", + " 'hobbies': ['Dancing',\n", + " 'Sketching',\n", + " 'Traveling'],\n", + " 'music_genre': 'Pop',\n", + " 'travel_destinations': ['Thailand',\n", + " 'France',\n", + " 'Spain']}},\n", + " {'address': {'city': 'Central City',\n", + " 'country': 'USA',\n", + " 'state': 'TX',\n", + " 'street': '345 Cedar St',\n", + " 'zip': '75001'},\n", + " 'age': 31,\n", + " 'contact': {'email': 'ethan.garcia@example.com',\n", + " 'phone': '+1-555-0999',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/ethangarcia',\n", + " 'twitter': '@ethangarcia31'}},\n", + " 'interesting_fact': 'Ethan runs a popular travel blog where he '\n", + " 'shares his adventures and culinary '\n", + " 'experiences.',\n", + " 'name': {'first': 'Ethan', 'last': 'Garcia'},\n", + " 'personal_preferences': {'favorite_food': 'Indian',\n", + " 'hobbies': ['Running',\n", + " 'Travel Blogging',\n", + " 'Cooking'],\n", + " 'music_genre': 'Hip-Hop',\n", + " 'travel_destinations': ['India',\n", + " 'Italy',\n", + " 'Mexico']}}]}\n" + ] + } + ], + "source": [ + "from langchain.prompts import PromptTemplate\n", + "from langchain_openai import ChatOpenAI\n", + "from pathlib import Path\n", + "from dotenv import load_dotenv\n", + "from pprint import pprint\n", + "import json\n", + "import os\n", + "\n", + "# Load .env file\n", + "load_dotenv()\n", + "\n", + "# Initialize ChatOpenAI\n", + "llm = ChatOpenAI(\n", + " model=\"gpt-4o-mini\",\n", + " temperature=0.7,\n", + " model_kwargs={\"response_format\": {\"type\": \"json_object\"}}\n", + ")\n", + "\n", + "# Create prompt template\n", + "prompt = PromptTemplate(\n", + " input_variables=[],\n", + " template=\"\"\"Generate a JSON array containing detailed personal information for 5 people. \n", + " Include various fields like name, age, contact details, address, personal preferences, and any other interesting information you think would be relevant.\"\"\"\n", + ")\n", + "\n", + "# Create and invoke runnable sequence using the new pipe syntax\n", + "response = (prompt | llm).invoke({})\n", + "generated_data = json.loads(response.content)\n", + "\n", + "# Save to JSON file\n", + "current_dir = Path().absolute()\n", + "data_dir = current_dir / \"data\"\n", + "data_dir.mkdir(exist_ok=True)\n", + "\n", + "file_path = data_dir / \"people.json\"\n", + "with open(file_path, \"w\", encoding=\"utf-8\") as f:\n", + " json.dump(generated_data, f, ensure_ascii=False, indent=2)\n", + "\n", + "print(\"Generated and saved JSON data:\")\n", + "pprint(generated_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The case of loading JSON data is as follows when you want to load your own JSON data." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'people': [{'address': {'city': 'Springfield',\n", + " 'country': 'USA',\n", + " 'state': 'IL',\n", + " 'street': '123 Maple St',\n", + " 'zip': '62704'},\n", + " 'age': 28,\n", + " 'contact': {'email': 'alice.johnson@example.com',\n", + " 'phone': '+1-555-0123',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/alicejohnson',\n", + " 'twitter': '@alice_j'}},\n", + " 'interesting_fact': 'Alice has traveled to over 15 countries and '\n", + " 'speaks 3 languages.',\n", + " 'name': {'first': 'Alice', 'last': 'Johnson'},\n", + " 'personal_preferences': {'favorite_food': 'Italian',\n", + " 'hobbies': ['Reading',\n", + " 'Hiking',\n", + " 'Cooking'],\n", + " 'music_genre': 'Jazz',\n", + " 'travel_destinations': ['Japan',\n", + " 'Italy',\n", + " 'Canada']}},\n", + " {'address': {'city': 'Metropolis',\n", + " 'country': 'USA',\n", + " 'state': 'NY',\n", + " 'street': '456 Oak Ave',\n", + " 'zip': '10001'},\n", + " 'age': 34,\n", + " 'contact': {'email': 'bob.smith@example.com',\n", + " 'phone': '+1-555-0456',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/bobsmith',\n", + " 'twitter': '@bobsmith34'}},\n", + " 'interesting_fact': 'Bob is an avid gamer and has competed in '\n", + " 'several national tournaments.',\n", + " 'name': {'first': 'Bob', 'last': 'Smith'},\n", + " 'personal_preferences': {'favorite_food': 'Mexican',\n", + " 'hobbies': ['Photography',\n", + " 'Cycling',\n", + " 'Video Games'],\n", + " 'music_genre': 'Rock',\n", + " 'travel_destinations': ['Brazil',\n", + " 'Australia',\n", + " 'Germany']}},\n", + " {'address': {'city': 'Gotham',\n", + " 'country': 'USA',\n", + " 'state': 'NJ',\n", + " 'street': '789 Pine Rd',\n", + " 'zip': '07001'},\n", + " 'age': 45,\n", + " 'contact': {'email': 'charlie.davis@example.com',\n", + " 'phone': '+1-555-0789',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/charliedavis',\n", + " 'twitter': '@charliedavis45'}},\n", + " 'interesting_fact': 'Charlie has a small farm where he raises '\n", + " 'chickens and grows organic vegetables.',\n", + " 'name': {'first': 'Charlie', 'last': 'Davis'},\n", + " 'personal_preferences': {'favorite_food': 'Barbecue',\n", + " 'hobbies': ['Gardening',\n", + " 'Fishing',\n", + " 'Woodworking'],\n", + " 'music_genre': 'Country',\n", + " 'travel_destinations': ['Canada',\n", + " 'New Zealand',\n", + " 'Norway']}},\n", + " {'address': {'city': 'Star City',\n", + " 'country': 'USA',\n", + " 'state': 'CA',\n", + " 'street': '234 Birch Blvd',\n", + " 'zip': '90001'},\n", + " 'age': 22,\n", + " 'contact': {'email': 'dana.lee@example.com',\n", + " 'phone': '+1-555-0111',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/danalee',\n", + " 'twitter': '@danalee22'}},\n", + " 'interesting_fact': 'Dana is a dance instructor and has won '\n", + " 'several local competitions.',\n", + " 'name': {'first': 'Dana', 'last': 'Lee'},\n", + " 'personal_preferences': {'favorite_food': 'Thai',\n", + " 'hobbies': ['Dancing',\n", + " 'Sketching',\n", + " 'Traveling'],\n", + " 'music_genre': 'Pop',\n", + " 'travel_destinations': ['Thailand',\n", + " 'France',\n", + " 'Spain']}},\n", + " {'address': {'city': 'Central City',\n", + " 'country': 'USA',\n", + " 'state': 'TX',\n", + " 'street': '345 Cedar St',\n", + " 'zip': '75001'},\n", + " 'age': 31,\n", + " 'contact': {'email': 'ethan.garcia@example.com',\n", + " 'phone': '+1-555-0999',\n", + " 'social_media': {'linkedin': 'linkedin.com/in/ethangarcia',\n", + " 'twitter': '@ethangarcia31'}},\n", + " 'interesting_fact': 'Ethan runs a popular travel blog where he '\n", + " 'shares his adventures and culinary '\n", + " 'experiences.',\n", + " 'name': {'first': 'Ethan', 'last': 'Garcia'},\n", + " 'personal_preferences': {'favorite_food': 'Indian',\n", + " 'hobbies': ['Running',\n", + " 'Travel Blogging',\n", + " 'Cooking'],\n", + " 'music_genre': 'Hip-Hop',\n", + " 'travel_destinations': ['India',\n", + " 'Italy',\n", + " 'Mexico']}}]}\n" + ] + } + ], + "source": [ + "import json\n", + "from pathlib import Path\n", + "from pprint import pprint\n", + "\n", + "\n", + "file_path = \"data/people.json\"\n", + "data = json.loads(Path(file_path).read_text())\n", + "\n", + "pprint(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "print(type(data))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# JSONLoader\n", + "\n", + "---\n", + "\n", + "When you want to extract values under the content field within the message key of JSON data, you can easily do this using JSONLoader as shown below." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 1}, page_content='{\"name\": \"Alice Smith\", \"age\": 32, \"contact\": {\"email\": \"alice.smith@example.com\", \"phone\": \"555-123-4567\"}, \"address\": {\"street\": \"123 Main St\", \"city\": \"New York\", \"state\": \"NY\", \"zip\": \"10001\"}, \"personal_preferences\": {\"favorite_color\": \"blue\", \"hobbies\": [\"reading\", \"yoga\"], \"favorite_food\": \"sushi\"}}'),\n", + " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 2}, page_content='{\"name\": \"John Doe\", \"age\": 45, \"contact\": {\"email\": \"john.doe@example.com\", \"phone\": \"555-987-6543\"}, \"address\": {\"street\": \"456 Elm St\", \"city\": \"Los Angeles\", \"state\": \"CA\", \"zip\": \"90001\"}, \"personal_preferences\": {\"favorite_color\": \"green\", \"hobbies\": [\"hiking\", \"gardening\"], \"favorite_food\": \"pizza\"}}'),\n", + " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 3}, page_content='{\"name\": \"Emily Johnson\", \"age\": 28, \"contact\": {\"email\": \"emily.johnson@example.com\", \"phone\": \"555-456-7890\"}, \"address\": {\"street\": \"789 Oak St\", \"city\": \"Chicago\", \"state\": \"IL\", \"zip\": \"60601\"}, \"personal_preferences\": {\"favorite_color\": \"pink\", \"hobbies\": [\"painting\", \"traveling\"], \"favorite_food\": \"tacos\"}}'),\n", + " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 4}, page_content='{\"name\": \"Michael Brown\", \"age\": 38, \"contact\": {\"email\": \"michael.brown@example.com\", \"phone\": \"555-234-5678\"}, \"address\": {\"street\": \"321 Maple St\", \"city\": \"Houston\", \"state\": \"TX\", \"zip\": \"77001\"}, \"personal_preferences\": {\"favorite_color\": \"red\", \"hobbies\": [\"playing guitar\", \"cooking\"], \"favorite_food\": \"barbecue\"}}'),\n", + " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 5}, page_content='{\"name\": \"Sarah Wilson\", \"age\": 35, \"contact\": {\"email\": \"sarah.wilson@example.com\", \"phone\": \"555-345-6789\"}, \"address\": {\"street\": \"654 Pine St\", \"city\": \"Miami\", \"state\": \"FL\", \"zip\": \"33101\"}, \"personal_preferences\": {\"favorite_color\": \"purple\", \"hobbies\": [\"photography\", \"dancing\"], \"favorite_food\": \"sushi\"}}')]\n" + ] + } + ], + "source": [ + "from langchain_community.document_loaders import JSONLoader\n", + "\n", + "# Create JSONLoader\n", + "loader = JSONLoader(\n", + " file_path=\"data/people.json\",\n", + " jq_schema=\".people[]\", # Access each item in the people array\n", + " text_content=False,\n", + ")\n", + "\n", + "# Example: extract only contact_details\n", + "# loader = JSONLoader(\n", + "# file_path=\"data/people.json\",\n", + "# jq_schema=\".people[].contact_details\",\n", + "# text_content=False,\n", + "# )\n", + "\n", + "# Or extract only hobbies from personal_preferences\n", + "# loader = JSONLoader(\n", + "# file_path=\"data/people.json\",\n", + "# jq_schema=\".people[].personal_preferences.hobbies\",\n", + "# text_content=False,\n", + "# )\n", + "\n", + "# Load documents\n", + "docs = loader.load()\n", + "pprint(docs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "langchain-opentutorial-LGorndcz-py3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 18aa3f93632f72a72d7737187ec165bd5acbda5d Mon Sep 17 00:00:00 2001 From: leebeanbin Date: Thu, 2 Jan 2025 19:55:34 +0900 Subject: [PATCH 4/5] N-1/06-DocumentLoader/10-Json_Loader [Title] JsonLoader [Version] Initial [Language] ENG [Packages] langchain, langchain-openai, langchain-community MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - json load 용 rq 의존성 추가(toml) - 뱃지, pprint 오류 반영 - 이슈 네임 & 파일명 수정 완료 --- 06-DocumentLoader/09-JSON-Loader.ipynb | 435 ------------------------- 1 file changed, 435 deletions(-) delete mode 100644 06-DocumentLoader/09-JSON-Loader.ipynb diff --git a/06-DocumentLoader/09-JSON-Loader.ipynb b/06-DocumentLoader/09-JSON-Loader.ipynb deleted file mode 100644 index 3b0302334..000000000 --- a/06-DocumentLoader/09-JSON-Loader.ipynb +++ /dev/null @@ -1,435 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# JSON\n", - "\n", - "Let's look at how to load files with the `.json` extension using a loader.\n", - "\n", - "- Author: [leebeanbin](https://github.com/leebeanbin)\n", - "- Design:\n", - "- Peer Review: \n", - "- This is a part of [LangChain Open Tutorial](https://github.com/LangChain-OpenTutorial/LangChain-OpenTutorial/tree/main/06-DocumentLoader)\n", - "\n", - "## Overview\n", - "This tutorial demonstrates how to use LangChain's JSONLoader to load and process JSON files. We'll explore how to extract specific data from structured JSON files using jq-style queries.\n", - "\n", - "### Table of Contents\n", - "- [JSON](#json)\n", - "- [Overview](#overview)\n", - "- [Generate JSON Data](#generate-json-data)\n", - "- [JSONLoader](#jsonloader)\n", - " \n", - "When you want to extract values under the content field within the message key of JSON data, you can easily do this using JSONLoader as shown below.\n", - "\n", - "reference: https://python.langchain.com/docs/how_to/document_loader_json/" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Generate JSON Data\n", - "\n", - "---\n", - "\n", - "if you want to generate JSON data, you can use the following code.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated and saved JSON data:\n", - "{'people': [{'address': {'city': 'Springfield',\n", - " 'country': 'USA',\n", - " 'state': 'IL',\n", - " 'street': '123 Maple St',\n", - " 'zip': '62704'},\n", - " 'age': 28,\n", - " 'contact': {'email': 'alice.johnson@example.com',\n", - " 'phone': '+1-555-0123',\n", - " 'social_media': {'linkedin': 'linkedin.com/in/alicejohnson',\n", - " 'twitter': '@alice_j'}},\n", - " 'interesting_fact': 'Alice has traveled to over 15 countries and '\n", - " 'speaks 3 languages.',\n", - " 'name': {'first': 'Alice', 'last': 'Johnson'},\n", - " 'personal_preferences': {'favorite_food': 'Italian',\n", - " 'hobbies': ['Reading',\n", - " 'Hiking',\n", - " 'Cooking'],\n", - " 'music_genre': 'Jazz',\n", - " 'travel_destinations': ['Japan',\n", - " 'Italy',\n", - " 'Canada']}},\n", - " {'address': {'city': 'Metropolis',\n", - " 'country': 'USA',\n", - " 'state': 'NY',\n", - " 'street': '456 Oak Ave',\n", - " 'zip': '10001'},\n", - " 'age': 34,\n", - " 'contact': {'email': 'bob.smith@example.com',\n", - " 'phone': '+1-555-0456',\n", - " 'social_media': {'linkedin': 'linkedin.com/in/bobsmith',\n", - " 'twitter': '@bobsmith34'}},\n", - " 'interesting_fact': 'Bob is an avid gamer and has competed in '\n", - " 'several national tournaments.',\n", - " 'name': {'first': 'Bob', 'last': 'Smith'},\n", - " 'personal_preferences': {'favorite_food': 'Mexican',\n", - " 'hobbies': ['Photography',\n", - " 'Cycling',\n", - " 'Video Games'],\n", - " 'music_genre': 'Rock',\n", - " 'travel_destinations': ['Brazil',\n", - " 'Australia',\n", - " 'Germany']}},\n", - " {'address': {'city': 'Gotham',\n", - " 'country': 'USA',\n", - " 'state': 'NJ',\n", - " 'street': '789 Pine Rd',\n", - " 'zip': '07001'},\n", - " 'age': 45,\n", - " 'contact': {'email': 'charlie.davis@example.com',\n", - " 'phone': '+1-555-0789',\n", - " 'social_media': {'linkedin': 'linkedin.com/in/charliedavis',\n", - " 'twitter': '@charliedavis45'}},\n", - " 'interesting_fact': 'Charlie has a small farm where he raises '\n", - " 'chickens and grows organic vegetables.',\n", - " 'name': {'first': 'Charlie', 'last': 'Davis'},\n", - " 'personal_preferences': {'favorite_food': 'Barbecue',\n", - " 'hobbies': ['Gardening',\n", - " 'Fishing',\n", - " 'Woodworking'],\n", - " 'music_genre': 'Country',\n", - " 'travel_destinations': ['Canada',\n", - " 'New Zealand',\n", - " 'Norway']}},\n", - " {'address': {'city': 'Star City',\n", - " 'country': 'USA',\n", - " 'state': 'CA',\n", - " 'street': '234 Birch Blvd',\n", - " 'zip': '90001'},\n", - " 'age': 22,\n", - " 'contact': {'email': 'dana.lee@example.com',\n", - " 'phone': '+1-555-0111',\n", - " 'social_media': {'linkedin': 'linkedin.com/in/danalee',\n", - " 'twitter': '@danalee22'}},\n", - " 'interesting_fact': 'Dana is a dance instructor and has won '\n", - " 'several local competitions.',\n", - " 'name': {'first': 'Dana', 'last': 'Lee'},\n", - " 'personal_preferences': {'favorite_food': 'Thai',\n", - " 'hobbies': ['Dancing',\n", - " 'Sketching',\n", - " 'Traveling'],\n", - " 'music_genre': 'Pop',\n", - " 'travel_destinations': ['Thailand',\n", - " 'France',\n", - " 'Spain']}},\n", - " {'address': {'city': 'Central City',\n", - " 'country': 'USA',\n", - " 'state': 'TX',\n", - " 'street': '345 Cedar St',\n", - " 'zip': '75001'},\n", - " 'age': 31,\n", - " 'contact': {'email': 'ethan.garcia@example.com',\n", - " 'phone': '+1-555-0999',\n", - " 'social_media': {'linkedin': 'linkedin.com/in/ethangarcia',\n", - " 'twitter': '@ethangarcia31'}},\n", - " 'interesting_fact': 'Ethan runs a popular travel blog where he '\n", - " 'shares his adventures and culinary '\n", - " 'experiences.',\n", - " 'name': {'first': 'Ethan', 'last': 'Garcia'},\n", - " 'personal_preferences': {'favorite_food': 'Indian',\n", - " 'hobbies': ['Running',\n", - " 'Travel Blogging',\n", - " 'Cooking'],\n", - " 'music_genre': 'Hip-Hop',\n", - " 'travel_destinations': ['India',\n", - " 'Italy',\n", - " 'Mexico']}}]}\n" - ] - } - ], - "source": [ - "from langchain.prompts import PromptTemplate\n", - "from langchain_openai import ChatOpenAI\n", - "from pathlib import Path\n", - "from dotenv import load_dotenv\n", - "import json\n", - "import os\n", - "\n", - "# Load .env file\n", - "load_dotenv()\n", - "\n", - "# Initialize ChatOpenAI\n", - "llm = ChatOpenAI(\n", - " model=\"gpt-4o-mini\",\n", - " temperature=0.7,\n", - " model_kwargs={\"response_format\": {\"type\": \"json_object\"}}\n", - ")\n", - "\n", - "# Create prompt template\n", - "prompt = PromptTemplate(\n", - " input_variables=[],\n", - " template=\"\"\"Generate a JSON array containing detailed personal information for 5 people. \n", - " Include various fields like name, age, contact details, address, personal preferences, and any other interesting information you think would be relevant.\"\"\"\n", - ")\n", - "\n", - "# Create and invoke runnable sequence using the new pipe syntax\n", - "response = (prompt | llm).invoke({})\n", - "generated_data = json.loads(response.content)\n", - "\n", - "# Save to JSON file\n", - "current_dir = Path().absolute()\n", - "data_dir = current_dir / \"data\"\n", - "data_dir.mkdir(exist_ok=True)\n", - "\n", - "file_path = data_dir / \"people.json\"\n", - "with open(file_path, \"w\", encoding=\"utf-8\") as f:\n", - " json.dump(generated_data, f, ensure_ascii=False, indent=2)\n", - "\n", - "print(\"Generated and saved JSON data:\")\n", - "pprint(generated_data)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The case of loading JSON data is as follows when you want to load your own JSON data." - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'people': [{'address': {'city': 'Springfield',\n", - " 'country': 'USA',\n", - " 'state': 'IL',\n", - " 'street': '123 Maple St',\n", - " 'zip': '62704'},\n", - " 'age': 28,\n", - " 'contact': {'email': 'alice.johnson@example.com',\n", - " 'phone': '+1-555-0123',\n", - " 'social_media': {'linkedin': 'linkedin.com/in/alicejohnson',\n", - " 'twitter': '@alice_j'}},\n", - " 'interesting_fact': 'Alice has traveled to over 15 countries and '\n", - " 'speaks 3 languages.',\n", - " 'name': {'first': 'Alice', 'last': 'Johnson'},\n", - " 'personal_preferences': {'favorite_food': 'Italian',\n", - " 'hobbies': ['Reading',\n", - " 'Hiking',\n", - " 'Cooking'],\n", - " 'music_genre': 'Jazz',\n", - " 'travel_destinations': ['Japan',\n", - " 'Italy',\n", - " 'Canada']}},\n", - " {'address': {'city': 'Metropolis',\n", - " 'country': 'USA',\n", - " 'state': 'NY',\n", - " 'street': '456 Oak Ave',\n", - " 'zip': '10001'},\n", - " 'age': 34,\n", - " 'contact': {'email': 'bob.smith@example.com',\n", - " 'phone': '+1-555-0456',\n", - " 'social_media': {'linkedin': 'linkedin.com/in/bobsmith',\n", - " 'twitter': '@bobsmith34'}},\n", - " 'interesting_fact': 'Bob is an avid gamer and has competed in '\n", - " 'several national tournaments.',\n", - " 'name': {'first': 'Bob', 'last': 'Smith'},\n", - " 'personal_preferences': {'favorite_food': 'Mexican',\n", - " 'hobbies': ['Photography',\n", - " 'Cycling',\n", - " 'Video Games'],\n", - " 'music_genre': 'Rock',\n", - " 'travel_destinations': ['Brazil',\n", - " 'Australia',\n", - " 'Germany']}},\n", - " {'address': {'city': 'Gotham',\n", - " 'country': 'USA',\n", - " 'state': 'NJ',\n", - " 'street': '789 Pine Rd',\n", - " 'zip': '07001'},\n", - " 'age': 45,\n", - " 'contact': {'email': 'charlie.davis@example.com',\n", - " 'phone': '+1-555-0789',\n", - " 'social_media': {'linkedin': 'linkedin.com/in/charliedavis',\n", - " 'twitter': '@charliedavis45'}},\n", - " 'interesting_fact': 'Charlie has a small farm where he raises '\n", - " 'chickens and grows organic vegetables.',\n", - " 'name': {'first': 'Charlie', 'last': 'Davis'},\n", - " 'personal_preferences': {'favorite_food': 'Barbecue',\n", - " 'hobbies': ['Gardening',\n", - " 'Fishing',\n", - " 'Woodworking'],\n", - " 'music_genre': 'Country',\n", - " 'travel_destinations': ['Canada',\n", - " 'New Zealand',\n", - " 'Norway']}},\n", - " {'address': {'city': 'Star City',\n", - " 'country': 'USA',\n", - " 'state': 'CA',\n", - " 'street': '234 Birch Blvd',\n", - " 'zip': '90001'},\n", - " 'age': 22,\n", - " 'contact': {'email': 'dana.lee@example.com',\n", - " 'phone': '+1-555-0111',\n", - " 'social_media': {'linkedin': 'linkedin.com/in/danalee',\n", - " 'twitter': '@danalee22'}},\n", - " 'interesting_fact': 'Dana is a dance instructor and has won '\n", - " 'several local competitions.',\n", - " 'name': {'first': 'Dana', 'last': 'Lee'},\n", - " 'personal_preferences': {'favorite_food': 'Thai',\n", - " 'hobbies': ['Dancing',\n", - " 'Sketching',\n", - " 'Traveling'],\n", - " 'music_genre': 'Pop',\n", - " 'travel_destinations': ['Thailand',\n", - " 'France',\n", - " 'Spain']}},\n", - " {'address': {'city': 'Central City',\n", - " 'country': 'USA',\n", - " 'state': 'TX',\n", - " 'street': '345 Cedar St',\n", - " 'zip': '75001'},\n", - " 'age': 31,\n", - " 'contact': {'email': 'ethan.garcia@example.com',\n", - " 'phone': '+1-555-0999',\n", - " 'social_media': {'linkedin': 'linkedin.com/in/ethangarcia',\n", - " 'twitter': '@ethangarcia31'}},\n", - " 'interesting_fact': 'Ethan runs a popular travel blog where he '\n", - " 'shares his adventures and culinary '\n", - " 'experiences.',\n", - " 'name': {'first': 'Ethan', 'last': 'Garcia'},\n", - " 'personal_preferences': {'favorite_food': 'Indian',\n", - " 'hobbies': ['Running',\n", - " 'Travel Blogging',\n", - " 'Cooking'],\n", - " 'music_genre': 'Hip-Hop',\n", - " 'travel_destinations': ['India',\n", - " 'Italy',\n", - " 'Mexico']}}]}\n" - ] - } - ], - "source": [ - "import json\n", - "from pathlib import Path\n", - "from pprint import pprint\n", - "\n", - "\n", - "file_path = \"data/people.json\"\n", - "data = json.loads(Path(file_path).read_text())\n", - "\n", - "pprint(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "print(type(data))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# JSONLoader\n", - "\n", - "---\n", - "\n", - "When you want to extract values under the content field within the message key of JSON data, you can easily do this using JSONLoader as shown below." - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 1}, page_content='{\"name\": \"Alice Smith\", \"age\": 32, \"contact\": {\"email\": \"alice.smith@example.com\", \"phone\": \"555-123-4567\"}, \"address\": {\"street\": \"123 Main St\", \"city\": \"New York\", \"state\": \"NY\", \"zip\": \"10001\"}, \"personal_preferences\": {\"favorite_color\": \"blue\", \"hobbies\": [\"reading\", \"yoga\"], \"favorite_food\": \"sushi\"}}'),\n", - " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 2}, page_content='{\"name\": \"John Doe\", \"age\": 45, \"contact\": {\"email\": \"john.doe@example.com\", \"phone\": \"555-987-6543\"}, \"address\": {\"street\": \"456 Elm St\", \"city\": \"Los Angeles\", \"state\": \"CA\", \"zip\": \"90001\"}, \"personal_preferences\": {\"favorite_color\": \"green\", \"hobbies\": [\"hiking\", \"gardening\"], \"favorite_food\": \"pizza\"}}'),\n", - " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 3}, page_content='{\"name\": \"Emily Johnson\", \"age\": 28, \"contact\": {\"email\": \"emily.johnson@example.com\", \"phone\": \"555-456-7890\"}, \"address\": {\"street\": \"789 Oak St\", \"city\": \"Chicago\", \"state\": \"IL\", \"zip\": \"60601\"}, \"personal_preferences\": {\"favorite_color\": \"pink\", \"hobbies\": [\"painting\", \"traveling\"], \"favorite_food\": \"tacos\"}}'),\n", - " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 4}, page_content='{\"name\": \"Michael Brown\", \"age\": 38, \"contact\": {\"email\": \"michael.brown@example.com\", \"phone\": \"555-234-5678\"}, \"address\": {\"street\": \"321 Maple St\", \"city\": \"Houston\", \"state\": \"TX\", \"zip\": \"77001\"}, \"personal_preferences\": {\"favorite_color\": \"red\", \"hobbies\": [\"playing guitar\", \"cooking\"], \"favorite_food\": \"barbecue\"}}'),\n", - " Document(metadata={'source': '/Users/leejungbin/Downloads/LangChain-OpenTutorial/06-DocumentLoader/data/people.json', 'seq_num': 5}, page_content='{\"name\": \"Sarah Wilson\", \"age\": 35, \"contact\": {\"email\": \"sarah.wilson@example.com\", \"phone\": \"555-345-6789\"}, \"address\": {\"street\": \"654 Pine St\", \"city\": \"Miami\", \"state\": \"FL\", \"zip\": \"33101\"}, \"personal_preferences\": {\"favorite_color\": \"purple\", \"hobbies\": [\"photography\", \"dancing\"], \"favorite_food\": \"sushi\"}}')]\n" - ] - } - ], - "source": [ - "from langchain_community.document_loaders import JSONLoader\n", - "\n", - "# Create JSONLoader\n", - "loader = JSONLoader(\n", - " file_path=\"data/people.json\",\n", - " jq_schema=\".people[]\", # Access each item in the people array\n", - " text_content=False,\n", - ")\n", - "\n", - "# Example: extract only contact_details\n", - "# loader = JSONLoader(\n", - "# file_path=\"data/people.json\",\n", - "# jq_schema=\".people[].contact_details\",\n", - "# text_content=False,\n", - "# )\n", - "\n", - "# Or extract only hobbies from personal_preferences\n", - "# loader = JSONLoader(\n", - "# file_path=\"data/people.json\",\n", - "# jq_schema=\".people[].personal_preferences.hobbies\",\n", - "# text_content=False,\n", - "# )\n", - "\n", - "# Load documents\n", - "docs = loader.load()\n", - "pprint(docs)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "langchain-opentutorial-LGorndcz-py3.11", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.10" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From e4518869ce6128bf0587c68a3761a3aa06e3d69b Mon Sep 17 00:00:00 2001 From: leebeanbin Date: Thu, 2 Jan 2025 20:21:49 +0900 Subject: [PATCH 5/5] N-1/06-DocumentLoader/10-Json_Loader [Title] JsonLoader [Version] Third [Language] ENG [Packages] langchain, langchain-openai, langchain-community - installations update - add setup fields and cell --- 06-DocumentLoader/10-JSON-Loader.ipynb | 101 +++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/06-DocumentLoader/10-JSON-Loader.ipynb b/06-DocumentLoader/10-JSON-Loader.ipynb index ce7553df4..f4b676a41 100644 --- a/06-DocumentLoader/10-JSON-Loader.ipynb +++ b/06-DocumentLoader/10-JSON-Loader.ipynb @@ -13,11 +13,20 @@ "- Peer Review: \n", "- This is a part of [LangChain Open Tutorial](https://github.com/LangChain-OpenTutorial/LangChain-OpenTutorial/tree/main/06-DocumentLoader)\n", "- [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain-academy/blob/main/module-4/sub-graph.ipynb) [![Open in LangChain Academy](https://cdn.prod.website-files.com/65b8cd72835ceeacd4449a53/66e9eba12c7b7688aa3dbb5e_LCA-badge-green.svg)](https://academy.langchain.com/courses/take/intro-to-langgraph/lessons/58239937-lesson-2-sub-graphs)\n", + "- \n", + "## Environment Setup\n", + "\n", + "Setting up your environment is the first step. See the [Environment Setup](https://wikidocs.net/257836) guide for more details.\n", + "\n", + "**[Note]**\n", + "- The `langchain-opentutorial` is a bundle of easy-to-use environment setup guidance, useful functions and utilities for tutorials.\n", + "- Check out the [`langchain-opentutorial`](https://github.com/LangChain-OpenTutorial/langchain-opentutorial-pypi) for more details.\n", "\n", "## Overview\n", "This tutorial demonstrates how to use LangChain's JSONLoader to load and process JSON files. We'll explore how to extract specific data from structured JSON files using jq-style queries.\n", "\n", "### Table of Contents\n", + "- [Environment Set up](#environment-setup)\n", "- [JSON](#json)\n", "- [Overview](#overview)\n", "- [Generate JSON Data](#generate-json-data)\n", @@ -25,10 +34,102 @@ " \n", "When you want to extract values under the content field within the message key of JSON data, you can easily do this using JSONLoader as shown below.\n", "\n", + "\n", "### reference\n", "- https://python.langchain.com/docs/how_to/document_loader_json/" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Environment Setup\n", + "\n", + "You can set and load `OPENAI_API_KEY` from a `.env` file when you'd like to make new json file.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "jupyter": { + "is_executing": true + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: langchain in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (0.3.13)\n", + "Requirement already satisfied: langchain_openai in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (0.2.14)\n", + "Requirement already satisfied: langchain_community in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (0.3.13)\n", + "Collecting rq\n", + " Downloading rq-2.1.0-py3-none-any.whl.metadata (5.8 kB)\n", + "Requirement already satisfied: PyYAML>=5.3 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain) (6.0.2)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain) (2.0.36)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain) (3.11.11)\n", + "Requirement already satisfied: langchain-core<0.4.0,>=0.3.26 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain) (0.3.28)\n", + "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain) (0.3.4)\n", + "Requirement already satisfied: langsmith<0.3,>=0.1.17 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain) (0.2.7)\n", + "Requirement already satisfied: numpy<2,>=1.22.4 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain) (1.26.4)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain) (2.10.4)\n", + "Requirement already satisfied: requests<3,>=2 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain) (2.32.3)\n", + "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain) (9.0.0)\n", + "Requirement already satisfied: openai<2.0.0,>=1.58.1 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain_openai) (1.58.1)\n", + "Requirement already satisfied: tiktoken<1,>=0.7 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain_openai) (0.8.0)\n", + "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain_community) (0.6.7)\n", + "Requirement already satisfied: httpx-sse<0.5.0,>=0.4.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain_community) (0.4.0)\n", + "Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain_community) (2.7.1)\n", + "Collecting click>=5 (from rq)\n", + " Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)\n", + "Collecting redis>=3.5 (from rq)\n", + " Downloading redis-5.2.1-py3-none-any.whl.metadata (9.1 kB)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (24.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.18.3)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain_community) (3.23.2)\n", + "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain_community) (0.9.0)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain-core<0.4.0,>=0.3.26->langchain) (1.33)\n", + "Requirement already satisfied: packaging<25,>=23.2 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain-core<0.4.0,>=0.3.26->langchain) (24.2)\n", + "Requirement already satisfied: typing-extensions>=4.7 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langchain-core<0.4.0,>=0.3.26->langchain) (4.12.2)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langsmith<0.3,>=0.1.17->langchain) (0.27.2)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langsmith<0.3,>=0.1.17->langchain) (3.10.13)\n", + "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from langsmith<0.3,>=0.1.17->langchain) (1.0.0)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (4.7.0)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (1.9.0)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (0.8.2)\n", + "Requirement already satisfied: sniffio in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (1.3.1)\n", + "Requirement already satisfied: tqdm>4 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (4.67.1)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.27.2)\n", + "Requirement already satisfied: python-dotenv>=0.21.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from pydantic-settings<3.0.0,>=2.4.0->langchain_community) (1.0.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from requests<3,>=2->langchain) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from requests<3,>=2->langchain) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from requests<3,>=2->langchain) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from requests<3,>=2->langchain) (2024.12.14)\n", + "Requirement already satisfied: regex>=2022.1.18 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from tiktoken<1,>=0.7->langchain_openai) (2024.11.6)\n", + "Requirement already satisfied: httpcore==1.* in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<0.3,>=0.1.17->langchain) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.3,>=0.1.17->langchain) (0.14.0)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.26->langchain) (3.0.0)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/leejungbin/Library/Caches/pypoetry/virtualenvs/langchain-opentutorial-LGorndcz-py3.11/lib/python3.11/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community) (1.0.0)\n", + "Downloading rq-2.1.0-py3-none-any.whl (96 kB)\n", + "Downloading click-8.1.8-py3-none-any.whl (98 kB)\n", + "Downloading redis-5.2.1-py3-none-any.whl (261 kB)\n", + "Installing collected packages: redis, click, rq\n", + "Successfully installed click-8.1.8 redis-5.2.1 rq-2.1.0\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install langchain langchain_openai langchain_community rq" + ] + }, { "cell_type": "markdown", "metadata": {},