{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Langchain: An Introduction"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Install"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting langchain\n",
      "  Downloading langchain-1.2.16-py3-none-any.whl.metadata (5.8 kB)\n",
      "Collecting langchain-core<2.0.0,>=1.3.2 (from langchain)\n",
      "  Downloading langchain_core-1.3.2-py3-none-any.whl.metadata (4.4 kB)\n",
      "Collecting langgraph<1.2.0,>=1.1.10 (from langchain)\n",
      "  Downloading langgraph-1.1.10-py3-none-any.whl.metadata (8.0 kB)\n",
      "Collecting pydantic<3.0.0,>=2.7.4 (from langchain)\n",
      "  Downloading pydantic-2.13.3-py3-none-any.whl.metadata (108 kB)\n",
      "Collecting jsonpatch<2.0.0,>=1.33.0 (from langchain-core<2.0.0,>=1.3.2->langchain)\n",
      "  Using cached jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)\n",
      "Collecting langchain-protocol>=0.0.10 (from langchain-core<2.0.0,>=1.3.2->langchain)\n",
      "  Downloading langchain_protocol-0.0.14-py3-none-any.whl.metadata (2.4 kB)\n",
      "Collecting langsmith<1.0.0,>=0.3.45 (from langchain-core<2.0.0,>=1.3.2->langchain)\n",
      "  Downloading langsmith-0.7.38-py3-none-any.whl.metadata (15 kB)\n",
      "Requirement already satisfied: packaging>=23.2.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain) (25.0)\n",
      "Collecting pyyaml<7.0.0,>=5.3.0 (from langchain-core<2.0.0,>=1.3.2->langchain)\n",
      "  Downloading pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl.metadata (2.4 kB)\n",
      "Collecting tenacity!=8.4.0,<10.0.0,>=8.1.0 (from langchain-core<2.0.0,>=1.3.2->langchain)\n",
      "  Downloading tenacity-9.1.4-py3-none-any.whl.metadata (1.2 kB)\n",
      "Collecting typing-extensions<5.0.0,>=4.7.0 (from langchain-core<2.0.0,>=1.3.2->langchain)\n",
      "  Downloading typing_extensions-4.15.0-py3-none-any.whl.metadata (3.3 kB)\n",
      "Collecting uuid-utils<1.0,>=0.12.0 (from langchain-core<2.0.0,>=1.3.2->langchain)\n",
      "  Downloading uuid_utils-0.14.1-cp39-abi3-macosx_10_12_x86_64.whl.metadata (4.8 kB)\n",
      "Collecting jsonpointer>=1.9 (from jsonpatch<2.0.0,>=1.33.0->langchain-core<2.0.0,>=1.3.2->langchain)\n",
      "  Downloading jsonpointer-3.1.1-py3-none-any.whl.metadata (2.4 kB)\n",
      "Collecting langgraph-checkpoint<5.0.0,>=2.1.0 (from langgraph<1.2.0,>=1.1.10->langchain)\n",
      "  Downloading langgraph_checkpoint-4.0.3-py3-none-any.whl.metadata (5.2 kB)\n",
      "Collecting langgraph-prebuilt<1.1.0,>=1.0.12 (from langgraph<1.2.0,>=1.1.10->langchain)\n",
      "  Downloading langgraph_prebuilt-1.0.13-py3-none-any.whl.metadata (5.2 kB)\n",
      "Collecting langgraph-sdk<0.4.0,>=0.3.0 (from langgraph<1.2.0,>=1.1.10->langchain)\n",
      "  Downloading langgraph_sdk-0.3.13-py3-none-any.whl.metadata (1.6 kB)\n",
      "Collecting xxhash>=3.5.0 (from langgraph<1.2.0,>=1.1.10->langchain)\n",
      "  Downloading xxhash-3.7.0-cp313-cp313-macosx_10_13_x86_64.whl.metadata (13 kB)\n",
      "Collecting ormsgpack>=1.12.0 (from langgraph-checkpoint<5.0.0,>=2.1.0->langgraph<1.2.0,>=1.1.10->langchain)\n",
      "  Downloading ormsgpack-1.12.2-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl.metadata (3.2 kB)\n",
      "Collecting httpx>=0.25.2 (from langgraph-sdk<0.4.0,>=0.3.0->langgraph<1.2.0,>=1.1.10->langchain)\n",
      "  Using cached httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)\n",
      "Collecting orjson>=3.11.5 (from langgraph-sdk<0.4.0,>=0.3.0->langgraph<1.2.0,>=1.1.10->langchain)\n",
      "  Downloading orjson-3.11.8-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl.metadata (41 kB)\n",
      "Collecting requests-toolbelt>=1.0.0 (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain)\n",
      "  Using cached requests_toolbelt-1.0.0-py2.py3-none-any.whl.metadata (14 kB)\n",
      "Collecting requests>=2.0.0 (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain)\n",
      "  Downloading requests-2.33.1-py3-none-any.whl.metadata (4.8 kB)\n",
      "Collecting zstandard>=0.23.0 (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain)\n",
      "  Downloading zstandard-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl.metadata (3.3 kB)\n",
      "Collecting anyio (from httpx>=0.25.2->langgraph-sdk<0.4.0,>=0.3.0->langgraph<1.2.0,>=1.1.10->langchain)\n",
      "  Downloading anyio-4.13.0-py3-none-any.whl.metadata (4.5 kB)\n",
      "Collecting certifi (from httpx>=0.25.2->langgraph-sdk<0.4.0,>=0.3.0->langgraph<1.2.0,>=1.1.10->langchain)\n",
      "  Downloading certifi-2026.4.22-py3-none-any.whl.metadata (2.5 kB)\n",
      "Collecting httpcore==1.* (from httpx>=0.25.2->langgraph-sdk<0.4.0,>=0.3.0->langgraph<1.2.0,>=1.1.10->langchain)\n",
      "  Downloading httpcore-1.0.9-py3-none-any.whl.metadata (21 kB)\n",
      "Collecting idna (from httpx>=0.25.2->langgraph-sdk<0.4.0,>=0.3.0->langgraph<1.2.0,>=1.1.10->langchain)\n",
      "  Downloading idna-3.13-py3-none-any.whl.metadata (8.0 kB)\n",
      "Collecting h11>=0.16 (from httpcore==1.*->httpx>=0.25.2->langgraph-sdk<0.4.0,>=0.3.0->langgraph<1.2.0,>=1.1.10->langchain)\n",
      "  Using cached h11-0.16.0-py3-none-any.whl.metadata (8.3 kB)\n",
      "Collecting annotated-types>=0.6.0 (from pydantic<3.0.0,>=2.7.4->langchain)\n",
      "  Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n",
      "Collecting pydantic-core==2.46.3 (from pydantic<3.0.0,>=2.7.4->langchain)\n",
      "  Downloading pydantic_core-2.46.3-cp313-cp313-macosx_10_12_x86_64.whl.metadata (6.6 kB)\n",
      "Collecting typing-inspection>=0.4.2 (from pydantic<3.0.0,>=2.7.4->langchain)\n",
      "  Downloading typing_inspection-0.4.2-py3-none-any.whl.metadata (2.6 kB)\n",
      "Collecting charset_normalizer<4,>=2 (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain)\n",
      "  Downloading charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl.metadata (40 kB)\n",
      "Collecting urllib3<3,>=1.26 (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain)\n",
      "  Downloading urllib3-2.6.3-py3-none-any.whl.metadata (6.9 kB)\n",
      "Downloading langchain-1.2.16-py3-none-any.whl (112 kB)\n",
      "Downloading langchain_core-1.3.2-py3-none-any.whl (542 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m542.4/542.4 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hUsing cached jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n",
      "Downloading langgraph-1.1.10-py3-none-any.whl (173 kB)\n",
      "Downloading langgraph_checkpoint-4.0.3-py3-none-any.whl (51 kB)\n",
      "Downloading langgraph_prebuilt-1.0.13-py3-none-any.whl (37 kB)\n",
      "Downloading langgraph_sdk-0.3.13-py3-none-any.whl (96 kB)\n",
      "Downloading langsmith-0.7.38-py3-none-any.whl (392 kB)\n",
      "Using cached httpx-0.28.1-py3-none-any.whl (73 kB)\n",
      "Downloading httpcore-1.0.9-py3-none-any.whl (78 kB)\n",
      "Downloading pydantic-2.13.3-py3-none-any.whl (471 kB)\n",
      "Downloading pydantic_core-2.46.3-cp313-cp313-macosx_10_12_x86_64.whl (2.1 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl (181 kB)\n",
      "Downloading tenacity-9.1.4-py3-none-any.whl (28 kB)\n",
      "Downloading typing_extensions-4.15.0-py3-none-any.whl (44 kB)\n",
      "Downloading uuid_utils-0.14.1-cp39-abi3-macosx_10_12_x86_64.whl (309 kB)\n",
      "Using cached annotated_types-0.7.0-py3-none-any.whl (13 kB)\n",
      "Using cached h11-0.16.0-py3-none-any.whl (37 kB)\n",
      "Downloading jsonpointer-3.1.1-py3-none-any.whl (7.7 kB)\n",
      "Downloading langchain_protocol-0.0.14-py3-none-any.whl (7.0 kB)\n",
      "Downloading orjson-3.11.8-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl (229 kB)\n",
      "Downloading ormsgpack-1.12.2-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl (378 kB)\n",
      "Downloading requests-2.33.1-py3-none-any.whl (64 kB)\n",
      "Downloading charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl (309 kB)\n",
      "Downloading idna-3.13-py3-none-any.whl (68 kB)\n",
      "Downloading urllib3-2.6.3-py3-none-any.whl (131 kB)\n",
      "Downloading certifi-2026.4.22-py3-none-any.whl (135 kB)\n",
      "Using cached requests_toolbelt-1.0.0-py2.py3-none-any.whl (54 kB)\n",
      "Downloading typing_inspection-0.4.2-py3-none-any.whl (14 kB)\n",
      "Downloading xxhash-3.7.0-cp313-cp313-macosx_10_13_x86_64.whl (33 kB)\n",
      "Downloading zstandard-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl (795 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m795.7/795.7 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading anyio-4.13.0-py3-none-any.whl (114 kB)\n",
      "Installing collected packages: zstandard, xxhash, uuid-utils, urllib3, typing-extensions, tenacity, pyyaml, ormsgpack, orjson, jsonpointer, idna, h11, charset_normalizer, certifi, annotated-types, typing-inspection, requests, pydantic-core, langchain-protocol, jsonpatch, httpcore, anyio, requests-toolbelt, pydantic, httpx, langsmith, langgraph-sdk, langchain-core, langgraph-checkpoint, langgraph-prebuilt, langgraph, langchain\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m32/32\u001b[0m [langchain]32\u001b[0m [langchain]prebuilt]\n",
      "\u001b[1A\u001b[2KSuccessfully installed annotated-types-0.7.0 anyio-4.13.0 certifi-2026.4.22 charset_normalizer-3.4.7 h11-0.16.0 httpcore-1.0.9 httpx-0.28.1 idna-3.13 jsonpatch-1.33 jsonpointer-3.1.1 langchain-1.2.16 langchain-core-1.3.2 langchain-protocol-0.0.14 langgraph-1.1.10 langgraph-checkpoint-4.0.3 langgraph-prebuilt-1.0.13 langgraph-sdk-0.3.13 langsmith-0.7.38 orjson-3.11.8 ormsgpack-1.12.2 pydantic-2.13.3 pydantic-core-2.46.3 pyyaml-6.0.3 requests-2.33.1 requests-toolbelt-1.0.0 tenacity-9.1.4 typing-extensions-4.15.0 typing-inspection-0.4.2 urllib3-2.6.3 uuid-utils-0.14.1 xxhash-3.7.0 zstandard-0.25.0\n",
      "Collecting langchain-community\n",
      "  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)\n",
      "Requirement already satisfied: langchain-core<2.0.0,>=1.0.1 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-community) (1.3.2)\n",
      "Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain-community)\n",
      "  Downloading langchain_classic-1.0.4-py3-none-any.whl.metadata (4.8 kB)\n",
      "Collecting SQLAlchemy<3.0.0,>=1.4.0 (from langchain-community)\n",
      "  Downloading sqlalchemy-2.0.49-py3-none-any.whl.metadata (9.5 kB)\n",
      "Requirement already satisfied: requests<3.0.0,>=2.32.5 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-community) (2.33.1)\n",
      "Requirement already satisfied: PyYAML<7.0.0,>=5.3.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-community) (6.0.3)\n",
      "Collecting aiohttp<4.0.0,>=3.8.3 (from langchain-community)\n",
      "  Downloading aiohttp-3.13.5-cp313-cp313-macosx_10_13_x86_64.whl.metadata (8.1 kB)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-community) (9.1.4)\n",
      "Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)\n",
      "  Using cached dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)\n",
      "Collecting pydantic-settings<3.0.0,>=2.10.1 (from langchain-community)\n",
      "  Downloading pydantic_settings-2.14.0-py3-none-any.whl.metadata (3.4 kB)\n",
      "Requirement already satisfied: langsmith<1.0.0,>=0.1.125 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-community) (0.7.38)\n",
      "Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)\n",
      "  Downloading httpx_sse-0.4.3-py3-none-any.whl.metadata (9.7 kB)\n",
      "Requirement already satisfied: numpy>=2.1.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-community) (2.3.1)\n",
      "Collecting aiohappyeyeballs>=2.5.0 (from aiohttp<4.0.0,>=3.8.3->langchain-community)\n",
      "  Downloading aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)\n",
      "Collecting aiosignal>=1.4.0 (from aiohttp<4.0.0,>=3.8.3->langchain-community)\n",
      "  Downloading aiosignal-1.4.0-py3-none-any.whl.metadata (3.7 kB)\n",
      "Collecting attrs>=17.3.0 (from aiohttp<4.0.0,>=3.8.3->langchain-community)\n",
      "  Downloading attrs-26.1.0-py3-none-any.whl.metadata (8.8 kB)\n",
      "Collecting frozenlist>=1.1.1 (from aiohttp<4.0.0,>=3.8.3->langchain-community)\n",
      "  Downloading frozenlist-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl.metadata (20 kB)\n",
      "Collecting multidict<7.0,>=4.5 (from aiohttp<4.0.0,>=3.8.3->langchain-community)\n",
      "  Downloading multidict-6.7.1-cp313-cp313-macosx_10_13_x86_64.whl.metadata (5.3 kB)\n",
      "Collecting propcache>=0.2.0 (from aiohttp<4.0.0,>=3.8.3->langchain-community)\n",
      "  Downloading propcache-0.4.1-cp313-cp313-macosx_10_13_x86_64.whl.metadata (13 kB)\n",
      "Collecting yarl<2.0,>=1.17.0 (from aiohttp<4.0.0,>=3.8.3->langchain-community)\n",
      "  Downloading yarl-1.23.0-cp313-cp313-macosx_10_13_x86_64.whl.metadata (79 kB)\n",
      "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)\n",
      "  Downloading marshmallow-3.26.2-py3-none-any.whl.metadata (7.3 kB)\n",
      "Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)\n",
      "  Using cached typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n",
      "Collecting langchain-text-splitters<2.0.0,>=1.1.2 (from langchain-classic<2.0.0,>=1.0.0->langchain-community)\n",
      "  Downloading langchain_text_splitters-1.1.2-py3-none-any.whl.metadata (3.3 kB)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-classic<2.0.0,>=1.0.0->langchain-community) (2.13.3)\n",
      "Requirement already satisfied: jsonpatch<2.0.0,>=1.33.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.0.1->langchain-community) (1.33)\n",
      "Requirement already satisfied: langchain-protocol>=0.0.10 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.0.1->langchain-community) (0.0.14)\n",
      "Requirement already satisfied: packaging>=23.2.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.0.1->langchain-community) (25.0)\n",
      "Requirement already satisfied: typing-extensions<5.0.0,>=4.7.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.0.1->langchain-community) (4.15.0)\n",
      "Requirement already satisfied: uuid-utils<1.0,>=0.12.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.0.1->langchain-community) (0.14.1)\n",
      "Requirement already satisfied: jsonpointer>=1.9 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core<2.0.0,>=1.0.1->langchain-community) (3.1.1)\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.1.125->langchain-community) (0.28.1)\n",
      "Requirement already satisfied: orjson>=3.9.14 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.1.125->langchain-community) (3.11.8)\n",
      "Requirement already satisfied: requests-toolbelt>=1.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.1.125->langchain-community) (1.0.0)\n",
      "Requirement already satisfied: xxhash>=3.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.1.125->langchain-community) (3.7.0)\n",
      "Requirement already satisfied: zstandard>=0.23.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.1.125->langchain-community) (0.25.0)\n",
      "Requirement already satisfied: anyio in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.1.125->langchain-community) (4.13.0)\n",
      "Requirement already satisfied: certifi in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.1.125->langchain-community) (2026.4.22)\n",
      "Requirement already satisfied: httpcore==1.* in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.1.125->langchain-community) (1.0.9)\n",
      "Requirement already satisfied: idna in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.1.125->langchain-community) (3.13)\n",
      "Requirement already satisfied: h11>=0.16 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<1.0.0,>=0.1.125->langchain-community) (0.16.0)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-classic<2.0.0,>=1.0.0->langchain-community) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.46.3 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-classic<2.0.0,>=1.0.0->langchain-community) (2.46.3)\n",
      "Requirement already satisfied: typing-inspection>=0.4.2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-classic<2.0.0,>=1.0.0->langchain-community) (0.4.2)\n",
      "Requirement already satisfied: python-dotenv>=0.21.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic-settings<3.0.0,>=2.10.1->langchain-community) (1.2.2)\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from requests<3.0.0,>=2.32.5->langchain-community) (3.4.7)\n",
      "Requirement already satisfied: urllib3<3,>=1.26 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from requests<3.0.0,>=2.32.5->langchain-community) (2.6.3)\n",
      "Collecting greenlet>=1 (from SQLAlchemy<3.0.0,>=1.4.0->langchain-community)\n",
      "  Downloading greenlet-3.5.0-cp313-cp313-macosx_11_0_universal2.whl.metadata (3.7 kB)\n",
      "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7.0,>=0.6.7->langchain-community)\n",
      "  Downloading mypy_extensions-1.1.0-py3-none-any.whl.metadata (1.1 kB)\n",
      "Downloading langchain_community-0.4.1-py3-none-any.whl (2.5 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.5/2.5 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading aiohttp-3.13.5-cp313-cp313-macosx_10_13_x86_64.whl (496 kB)\n",
      "Using cached dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n",
      "Downloading httpx_sse-0.4.3-py3-none-any.whl (9.0 kB)\n",
      "Downloading langchain_classic-1.0.4-py3-none-any.whl (1.0 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading langchain_text_splitters-1.1.2-py3-none-any.whl (35 kB)\n",
      "Downloading marshmallow-3.26.2-py3-none-any.whl (50 kB)\n",
      "Downloading multidict-6.7.1-cp313-cp313-macosx_10_13_x86_64.whl (45 kB)\n",
      "Downloading pydantic_settings-2.14.0-py3-none-any.whl (60 kB)\n",
      "Downloading sqlalchemy-2.0.49-py3-none-any.whl (1.9 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.9/1.9 MB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hUsing cached typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n",
      "Downloading yarl-1.23.0-cp313-cp313-macosx_10_13_x86_64.whl (86 kB)\n",
      "Downloading aiohappyeyeballs-2.6.1-py3-none-any.whl (15 kB)\n",
      "Downloading aiosignal-1.4.0-py3-none-any.whl (7.5 kB)\n",
      "Downloading attrs-26.1.0-py3-none-any.whl (67 kB)\n",
      "Downloading frozenlist-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl (49 kB)\n",
      "Downloading greenlet-3.5.0-cp313-cp313-macosx_11_0_universal2.whl (285 kB)\n",
      "Downloading mypy_extensions-1.1.0-py3-none-any.whl (5.0 kB)\n",
      "Downloading propcache-0.4.1-cp313-cp313-macosx_10_13_x86_64.whl (44 kB)\n",
      "Installing collected packages: propcache, mypy-extensions, multidict, marshmallow, httpx-sse, greenlet, frozenlist, attrs, aiohappyeyeballs, yarl, typing-inspect, SQLAlchemy, aiosignal, pydantic-settings, dataclasses-json, aiohttp, langchain-text-splitters, langchain-classic, langchain-community\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19/19\u001b[0m [langchain-community]ngchain-community]ters]\n",
      "\u001b[1A\u001b[2KSuccessfully installed SQLAlchemy-2.0.49 aiohappyeyeballs-2.6.1 aiohttp-3.13.5 aiosignal-1.4.0 attrs-26.1.0 dataclasses-json-0.6.7 frozenlist-1.8.0 greenlet-3.5.0 httpx-sse-0.4.3 langchain-classic-1.0.4 langchain-community-0.4.1 langchain-text-splitters-1.1.2 marshmallow-3.26.2 multidict-6.7.1 mypy-extensions-1.1.0 propcache-0.4.1 pydantic-settings-2.14.0 typing-inspect-0.9.0 yarl-1.23.0\n",
      "Requirement already satisfied: langchain-core in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (1.3.2)\n",
      "Requirement already satisfied: jsonpatch<2.0.0,>=1.33.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core) (1.33)\n",
      "Requirement already satisfied: langchain-protocol>=0.0.10 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core) (0.0.14)\n",
      "Requirement already satisfied: langsmith<1.0.0,>=0.3.45 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core) (0.7.38)\n",
      "Requirement already satisfied: packaging>=23.2.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core) (25.0)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core) (2.13.3)\n",
      "Requirement already satisfied: pyyaml<7.0.0,>=5.3.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core) (6.0.3)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core) (9.1.4)\n",
      "Requirement already satisfied: typing-extensions<5.0.0,>=4.7.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core) (4.15.0)\n",
      "Requirement already satisfied: uuid-utils<1.0,>=0.12.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core) (0.14.1)\n",
      "Requirement already satisfied: jsonpointer>=1.9 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core) (3.1.1)\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core) (0.28.1)\n",
      "Requirement already satisfied: orjson>=3.9.14 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core) (3.11.8)\n",
      "Requirement already satisfied: requests-toolbelt>=1.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core) (1.0.0)\n",
      "Requirement already satisfied: requests>=2.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core) (2.33.1)\n",
      "Requirement already satisfied: xxhash>=3.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core) (3.7.0)\n",
      "Requirement already satisfied: zstandard>=0.23.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core) (0.25.0)\n",
      "Requirement already satisfied: anyio in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core) (4.13.0)\n",
      "Requirement already satisfied: certifi in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core) (2026.4.22)\n",
      "Requirement already satisfied: httpcore==1.* in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core) (1.0.9)\n",
      "Requirement already satisfied: idna in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core) (3.13)\n",
      "Requirement already satisfied: h11>=0.16 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core) (0.16.0)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.46.3 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core) (2.46.3)\n",
      "Requirement already satisfied: typing-inspection>=0.4.2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core) (0.4.2)\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain-core) (3.4.7)\n",
      "Requirement already satisfied: urllib3<3,>=1.26 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain-core) (2.6.3)\n",
      "Collecting langchain-openai\n",
      "  Downloading langchain_openai-1.2.1-py3-none-any.whl.metadata (3.1 kB)\n",
      "Requirement already satisfied: langchain-core<2.0.0,>=1.3.2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-openai) (1.3.2)\n",
      "Collecting openai<3.0.0,>=2.26.0 (from langchain-openai)\n",
      "  Downloading openai-2.33.0-py3-none-any.whl.metadata (31 kB)\n",
      "Collecting tiktoken<1.0.0,>=0.7.0 (from langchain-openai)\n",
      "  Downloading tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl.metadata (6.7 kB)\n",
      "Requirement already satisfied: jsonpatch<2.0.0,>=1.33.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain-openai) (1.33)\n",
      "Requirement already satisfied: langchain-protocol>=0.0.10 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain-openai) (0.0.14)\n",
      "Requirement already satisfied: langsmith<1.0.0,>=0.3.45 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain-openai) (0.7.38)\n",
      "Requirement already satisfied: packaging>=23.2.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain-openai) (25.0)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain-openai) (2.13.3)\n",
      "Requirement already satisfied: pyyaml<7.0.0,>=5.3.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain-openai) (6.0.3)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain-openai) (9.1.4)\n",
      "Requirement already satisfied: typing-extensions<5.0.0,>=4.7.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain-openai) (4.15.0)\n",
      "Requirement already satisfied: uuid-utils<1.0,>=0.12.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain-openai) (0.14.1)\n",
      "Requirement already satisfied: jsonpointer>=1.9 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core<2.0.0,>=1.3.2->langchain-openai) (3.1.1)\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain-openai) (0.28.1)\n",
      "Requirement already satisfied: orjson>=3.9.14 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain-openai) (3.11.8)\n",
      "Requirement already satisfied: requests-toolbelt>=1.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain-openai) (1.0.0)\n",
      "Requirement already satisfied: requests>=2.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain-openai) (2.33.1)\n",
      "Requirement already satisfied: xxhash>=3.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain-openai) (3.7.0)\n",
      "Requirement already satisfied: zstandard>=0.23.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain-openai) (0.25.0)\n",
      "Requirement already satisfied: anyio in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain-openai) (4.13.0)\n",
      "Requirement already satisfied: certifi in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain-openai) (2026.4.22)\n",
      "Requirement already satisfied: httpcore==1.* in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain-openai) (1.0.9)\n",
      "Requirement already satisfied: idna in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain-openai) (3.13)\n",
      "Requirement already satisfied: h11>=0.16 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain-openai) (0.16.0)\n",
      "Collecting distro<2,>=1.7.0 (from openai<3.0.0,>=2.26.0->langchain-openai)\n",
      "  Using cached distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)\n",
      "Collecting jiter<1,>=0.10.0 (from openai<3.0.0,>=2.26.0->langchain-openai)\n",
      "  Downloading jiter-0.14.0-cp313-cp313-macosx_10_12_x86_64.whl.metadata (5.2 kB)\n",
      "Collecting sniffio (from openai<3.0.0,>=2.26.0->langchain-openai)\n",
      "  Using cached sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)\n",
      "Collecting tqdm>4 (from openai<3.0.0,>=2.26.0->langchain-openai)\n",
      "  Downloading tqdm-4.67.3-py3-none-any.whl.metadata (57 kB)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.3.2->langchain-openai) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.46.3 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.3.2->langchain-openai) (2.46.3)\n",
      "Requirement already satisfied: typing-inspection>=0.4.2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.3.2->langchain-openai) (0.4.2)\n",
      "Collecting regex>=2022.1.18 (from tiktoken<1.0.0,>=0.7.0->langchain-openai)\n",
      "  Downloading regex-2026.4.4-cp313-cp313-macosx_10_13_x86_64.whl.metadata (40 kB)\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain-openai) (3.4.7)\n",
      "Requirement already satisfied: urllib3<3,>=1.26 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain-openai) (2.6.3)\n",
      "Downloading langchain_openai-1.2.1-py3-none-any.whl (98 kB)\n",
      "Downloading openai-2.33.0-py3-none-any.whl (1.2 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hUsing cached distro-1.9.0-py3-none-any.whl (20 kB)\n",
      "Downloading jiter-0.14.0-cp313-cp313-macosx_10_12_x86_64.whl (315 kB)\n",
      "Downloading tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl (1.1 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0m\n",
      "\u001b[?25hDownloading regex-2026.4.4-cp313-cp313-macosx_10_13_x86_64.whl (291 kB)\n",
      "Downloading tqdm-4.67.3-py3-none-any.whl (78 kB)\n",
      "Using cached sniffio-1.3.1-py3-none-any.whl (10 kB)\n",
      "Installing collected packages: tqdm, sniffio, regex, jiter, distro, tiktoken, openai, langchain-openai\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8/8\u001b[0m [langchain-openai][openai]]\n",
      "\u001b[1A\u001b[2KSuccessfully installed distro-1.9.0 jiter-0.14.0 langchain-openai-1.2.1 openai-2.33.0 regex-2026.4.4 sniffio-1.3.1 tiktoken-0.12.0 tqdm-4.67.3\n",
      "Requirement already satisfied: langchain in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (1.2.16)\n",
      "Requirement already satisfied: openai in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (2.33.0)\n",
      "Collecting weaviate-client\n",
      "  Downloading weaviate_client-4.21.0-py3-none-any.whl.metadata (3.5 kB)\n",
      "Requirement already satisfied: langchain-core<2.0.0,>=1.3.2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain) (1.3.2)\n",
      "Requirement already satisfied: langgraph<1.2.0,>=1.1.10 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain) (1.1.10)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain) (2.13.3)\n",
      "Requirement already satisfied: jsonpatch<2.0.0,>=1.33.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain) (1.33)\n",
      "Requirement already satisfied: langchain-protocol>=0.0.10 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain) (0.0.14)\n",
      "Requirement already satisfied: langsmith<1.0.0,>=0.3.45 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain) (0.7.38)\n",
      "Requirement already satisfied: packaging>=23.2.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain) (25.0)\n",
      "Requirement already satisfied: pyyaml<7.0.0,>=5.3.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain) (6.0.3)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain) (9.1.4)\n",
      "Requirement already satisfied: typing-extensions<5.0.0,>=4.7.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain) (4.15.0)\n",
      "Requirement already satisfied: uuid-utils<1.0,>=0.12.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.3.2->langchain) (0.14.1)\n",
      "Requirement already satisfied: jsonpointer>=1.9 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core<2.0.0,>=1.3.2->langchain) (3.1.1)\n",
      "Requirement already satisfied: langgraph-checkpoint<5.0.0,>=2.1.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langgraph<1.2.0,>=1.1.10->langchain) (4.0.3)\n",
      "Requirement already satisfied: langgraph-prebuilt<1.1.0,>=1.0.12 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langgraph<1.2.0,>=1.1.10->langchain) (1.0.13)\n",
      "Requirement already satisfied: langgraph-sdk<0.4.0,>=0.3.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langgraph<1.2.0,>=1.1.10->langchain) (0.3.13)\n",
      "Requirement already satisfied: xxhash>=3.5.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langgraph<1.2.0,>=1.1.10->langchain) (3.7.0)\n",
      "Requirement already satisfied: ormsgpack>=1.12.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langgraph-checkpoint<5.0.0,>=2.1.0->langgraph<1.2.0,>=1.1.10->langchain) (1.12.2)\n",
      "Requirement already satisfied: httpx>=0.25.2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langgraph-sdk<0.4.0,>=0.3.0->langgraph<1.2.0,>=1.1.10->langchain) (0.28.1)\n",
      "Requirement already satisfied: orjson>=3.11.5 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langgraph-sdk<0.4.0,>=0.3.0->langgraph<1.2.0,>=1.1.10->langchain) (3.11.8)\n",
      "Requirement already satisfied: requests-toolbelt>=1.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain) (1.0.0)\n",
      "Requirement already satisfied: requests>=2.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain) (2.33.1)\n",
      "Requirement already satisfied: zstandard>=0.23.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain) (0.25.0)\n",
      "Requirement already satisfied: anyio in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx>=0.25.2->langgraph-sdk<0.4.0,>=0.3.0->langgraph<1.2.0,>=1.1.10->langchain) (4.13.0)\n",
      "Requirement already satisfied: certifi in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx>=0.25.2->langgraph-sdk<0.4.0,>=0.3.0->langgraph<1.2.0,>=1.1.10->langchain) (2026.4.22)\n",
      "Requirement already satisfied: httpcore==1.* in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx>=0.25.2->langgraph-sdk<0.4.0,>=0.3.0->langgraph<1.2.0,>=1.1.10->langchain) (1.0.9)\n",
      "Requirement already satisfied: idna in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx>=0.25.2->langgraph-sdk<0.4.0,>=0.3.0->langgraph<1.2.0,>=1.1.10->langchain) (3.13)\n",
      "Requirement already satisfied: h11>=0.16 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpcore==1.*->httpx>=0.25.2->langgraph-sdk<0.4.0,>=0.3.0->langgraph<1.2.0,>=1.1.10->langchain) (0.16.0)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.46.3 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.46.3)\n",
      "Requirement already satisfied: typing-inspection>=0.4.2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.4.2)\n",
      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from openai) (1.9.0)\n",
      "Requirement already satisfied: jiter<1,>=0.10.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from openai) (0.14.0)\n",
      "Requirement already satisfied: sniffio in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from openai) (1.3.1)\n",
      "Requirement already satisfied: tqdm>4 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from openai) (4.67.3)\n",
      "Collecting validators<1.0.0,>=0.34.0 (from weaviate-client)\n",
      "  Downloading validators-0.35.0-py3-none-any.whl.metadata (3.9 kB)\n",
      "Collecting authlib<2.0.0,>=1.6.7 (from weaviate-client)\n",
      "  Downloading authlib-1.7.0-py2.py3-none-any.whl.metadata (10.0 kB)\n",
      "Collecting grpcio<1.80.0,>=1.59.5 (from weaviate-client)\n",
      "  Downloading grpcio-1.78.0-cp313-cp313-macosx_11_0_universal2.whl.metadata (3.8 kB)\n",
      "Collecting protobuf<7.0.0,>=4.21.6 (from weaviate-client)\n",
      "  Downloading protobuf-6.33.6-cp39-abi3-macosx_10_9_universal2.whl.metadata (593 bytes)\n",
      "Collecting cryptography (from authlib<2.0.0,>=1.6.7->weaviate-client)\n",
      "  Downloading cryptography-47.0.0-cp311-abi3-macosx_10_9_universal2.whl.metadata (4.5 kB)\n",
      "Collecting joserfc>=1.6.0 (from authlib<2.0.0,>=1.6.7->weaviate-client)\n",
      "  Downloading joserfc-1.6.4-py3-none-any.whl.metadata (3.2 kB)\n",
      "Collecting cffi>=2.0.0 (from cryptography->authlib<2.0.0,>=1.6.7->weaviate-client)\n",
      "  Downloading cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl.metadata (2.6 kB)\n",
      "Collecting pycparser (from cffi>=2.0.0->cryptography->authlib<2.0.0,>=1.6.7->weaviate-client)\n",
      "  Downloading pycparser-3.0-py3-none-any.whl.metadata (8.2 kB)\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain) (3.4.7)\n",
      "Requirement already satisfied: urllib3<3,>=1.26 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.3.2->langchain) (2.6.3)\n",
      "Downloading weaviate_client-4.21.0-py3-none-any.whl (639 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m639.0/639.0 kB\u001b[0m \u001b[31m1.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading authlib-1.7.0-py2.py3-none-any.whl (258 kB)\n",
      "Downloading grpcio-1.78.0-cp313-cp313-macosx_11_0_universal2.whl (11.8 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.8/11.8 MB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading protobuf-6.33.6-cp39-abi3-macosx_10_9_universal2.whl (427 kB)\n",
      "Downloading validators-0.35.0-py3-none-any.whl (44 kB)\n",
      "Downloading joserfc-1.6.4-py3-none-any.whl (70 kB)\n",
      "Downloading cryptography-47.0.0-cp311-abi3-macosx_10_9_universal2.whl (7.9 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.9/7.9 MB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0mm\n",
      "\u001b[?25hDownloading cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl (185 kB)\n",
      "Downloading pycparser-3.0-py3-none-any.whl (48 kB)\n",
      "Installing collected packages: validators, pycparser, protobuf, grpcio, cffi, cryptography, joserfc, authlib, weaviate-client\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9/9\u001b[0m [weaviate-client] [weaviate-client]\n",
      "\u001b[1A\u001b[2KSuccessfully installed authlib-1.7.0 cffi-2.0.0 cryptography-47.0.0 grpcio-1.78.0 joserfc-1.6.4 protobuf-6.33.6 pycparser-3.0 validators-0.35.0 weaviate-client-4.21.0\n"
     ]
    }
   ],
   "source": [
    "!pip install langchain\n",
    "!pip install langchain-community\n",
    "!pip install langchain-core\n",
    "!pip install -U langchain-openai\n",
    "!pip install langchain openai weaviate-client\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting langchain-google-genai\n",
      "  Using cached langchain_google_genai-4.2.2-py3-none-any.whl.metadata (2.7 kB)\n",
      "Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)\n",
      "  Using cached filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)\n",
      "Collecting google-genai<2.0.0,>=1.65.0 (from langchain-google-genai)\n",
      "  Using cached google_genai-1.75.0-py3-none-any.whl.metadata (52 kB)\n",
      "Requirement already satisfied: langchain-core<2.0.0,>=1.2.29 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-google-genai) (1.3.2)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-google-genai) (2.13.3)\n",
      "Requirement already satisfied: anyio<5.0.0,>=4.8.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from google-genai<2.0.0,>=1.65.0->langchain-google-genai) (4.13.0)\n",
      "Collecting google-auth<3.0.0,>=2.48.1 (from google-auth[requests]<3.0.0,>=2.48.1->google-genai<2.0.0,>=1.65.0->langchain-google-genai)\n",
      "  Using cached google_auth-2.52.0-py3-none-any.whl.metadata (5.5 kB)\n",
      "Requirement already satisfied: httpx<1.0.0,>=0.28.1 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from google-genai<2.0.0,>=1.65.0->langchain-google-genai) (0.28.1)\n",
      "Requirement already satisfied: requests<3.0.0,>=2.28.1 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from google-genai<2.0.0,>=1.65.0->langchain-google-genai) (2.33.1)\n",
      "Requirement already satisfied: tenacity<9.2.0,>=8.2.3 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from google-genai<2.0.0,>=1.65.0->langchain-google-genai) (9.1.4)\n",
      "Collecting websockets<17.0,>=13.0.0 (from google-genai<2.0.0,>=1.65.0->langchain-google-genai)\n",
      "  Using cached websockets-16.0-cp313-cp313-macosx_10_13_x86_64.whl.metadata (6.8 kB)\n",
      "Requirement already satisfied: typing-extensions<5.0.0,>=4.14.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from google-genai<2.0.0,>=1.65.0->langchain-google-genai) (4.15.0)\n",
      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from google-genai<2.0.0,>=1.65.0->langchain-google-genai) (1.9.0)\n",
      "Requirement already satisfied: sniffio in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from google-genai<2.0.0,>=1.65.0->langchain-google-genai) (1.3.1)\n",
      "Requirement already satisfied: idna>=2.8 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from anyio<5.0.0,>=4.8.0->google-genai<2.0.0,>=1.65.0->langchain-google-genai) (3.13)\n",
      "Collecting pyasn1-modules>=0.2.1 (from google-auth<3.0.0,>=2.48.1->google-auth[requests]<3.0.0,>=2.48.1->google-genai<2.0.0,>=1.65.0->langchain-google-genai)\n",
      "  Using cached pyasn1_modules-0.4.2-py3-none-any.whl.metadata (3.5 kB)\n",
      "Requirement already satisfied: cryptography>=38.0.3 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from google-auth<3.0.0,>=2.48.1->google-auth[requests]<3.0.0,>=2.48.1->google-genai<2.0.0,>=1.65.0->langchain-google-genai) (47.0.0)\n",
      "Requirement already satisfied: certifi in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1.0.0,>=0.28.1->google-genai<2.0.0,>=1.65.0->langchain-google-genai) (2026.4.22)\n",
      "Requirement already satisfied: httpcore==1.* in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1.0.0,>=0.28.1->google-genai<2.0.0,>=1.65.0->langchain-google-genai) (1.0.9)\n",
      "Requirement already satisfied: h11>=0.16 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpcore==1.*->httpx<1.0.0,>=0.28.1->google-genai<2.0.0,>=1.65.0->langchain-google-genai) (0.16.0)\n",
      "Requirement already satisfied: jsonpatch<2.0.0,>=1.33.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.2.29->langchain-google-genai) (1.33)\n",
      "Requirement already satisfied: langchain-protocol>=0.0.10 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.2.29->langchain-google-genai) (0.0.14)\n",
      "Requirement already satisfied: langsmith<1.0.0,>=0.3.45 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.2.29->langchain-google-genai) (0.7.38)\n",
      "Requirement already satisfied: packaging>=23.2.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.2.29->langchain-google-genai) (25.0)\n",
      "Requirement already satisfied: pyyaml<7.0.0,>=5.3.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.2.29->langchain-google-genai) (6.0.3)\n",
      "Requirement already satisfied: uuid-utils<1.0,>=0.12.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.2.29->langchain-google-genai) (0.14.1)\n",
      "Requirement already satisfied: jsonpointer>=1.9 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core<2.0.0,>=1.2.29->langchain-google-genai) (3.1.1)\n",
      "Requirement already satisfied: orjson>=3.9.14 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.29->langchain-google-genai) (3.11.8)\n",
      "Requirement already satisfied: requests-toolbelt>=1.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.29->langchain-google-genai) (1.0.0)\n",
      "Requirement already satisfied: xxhash>=3.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.29->langchain-google-genai) (3.7.0)\n",
      "Requirement already satisfied: zstandard>=0.23.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.29->langchain-google-genai) (0.25.0)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.0.0->langchain-google-genai) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.46.3 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.0.0->langchain-google-genai) (2.46.3)\n",
      "Requirement already satisfied: typing-inspection>=0.4.2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.0.0->langchain-google-genai) (0.4.2)\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from requests<3.0.0,>=2.28.1->google-genai<2.0.0,>=1.65.0->langchain-google-genai) (3.4.7)\n",
      "Requirement already satisfied: urllib3<3,>=1.26 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from requests<3.0.0,>=2.28.1->google-genai<2.0.0,>=1.65.0->langchain-google-genai) (2.6.3)\n",
      "Requirement already satisfied: cffi>=2.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from cryptography>=38.0.3->google-auth<3.0.0,>=2.48.1->google-auth[requests]<3.0.0,>=2.48.1->google-genai<2.0.0,>=1.65.0->langchain-google-genai) (2.0.0)\n",
      "Requirement already satisfied: pycparser in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from cffi>=2.0.0->cryptography>=38.0.3->google-auth<3.0.0,>=2.48.1->google-auth[requests]<3.0.0,>=2.48.1->google-genai<2.0.0,>=1.65.0->langchain-google-genai) (3.0)\n",
      "Collecting pyasn1<0.7.0,>=0.6.1 (from pyasn1-modules>=0.2.1->google-auth<3.0.0,>=2.48.1->google-auth[requests]<3.0.0,>=2.48.1->google-genai<2.0.0,>=1.65.0->langchain-google-genai)\n",
      "  Using cached pyasn1-0.6.3-py3-none-any.whl.metadata (8.4 kB)\n",
      "Using cached langchain_google_genai-4.2.2-py3-none-any.whl (67 kB)\n",
      "Using cached filetype-1.2.0-py2.py3-none-any.whl (19 kB)\n",
      "Using cached google_genai-1.75.0-py3-none-any.whl (793 kB)\n",
      "Using cached google_auth-2.52.0-py3-none-any.whl (245 kB)\n",
      "Using cached websockets-16.0-cp313-cp313-macosx_10_13_x86_64.whl (175 kB)\n",
      "Using cached pyasn1_modules-0.4.2-py3-none-any.whl (181 kB)\n",
      "Using cached pyasn1-0.6.3-py3-none-any.whl (83 kB)\n",
      "Installing collected packages: filetype, websockets, pyasn1, pyasn1-modules, google-auth, google-genai, langchain-google-genai\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7/7\u001b[0m [langchain-google-genai]hain-google-genai]\n",
      "\u001b[1A\u001b[2KSuccessfully installed filetype-1.2.0 google-auth-2.52.0 google-genai-1.75.0 langchain-google-genai-4.2.2 pyasn1-0.6.3 pyasn1-modules-0.4.2 websockets-16.0\n"
     ]
    }
   ],
   "source": [
    "!pip install -U langchain-google-genai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: huggingface_hub in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (1.13.0)\n",
      "Requirement already satisfied: filelock>=3.10.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface_hub) (3.29.0)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface_hub) (2026.4.0)\n",
      "Requirement already satisfied: hf-xet<2.0.0,>=1.4.3 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface_hub) (1.4.3)\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface_hub) (0.28.1)\n",
      "Requirement already satisfied: packaging>=20.9 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface_hub) (25.0)\n",
      "Requirement already satisfied: pyyaml>=5.1 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface_hub) (6.0.3)\n",
      "Requirement already satisfied: tqdm>=4.42.1 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface_hub) (4.67.3)\n",
      "Requirement already satisfied: typer in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface_hub) (0.25.0)\n",
      "Requirement already satisfied: typing-extensions>=4.1.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface_hub) (4.15.0)\n",
      "Requirement already satisfied: anyio in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->huggingface_hub) (4.13.0)\n",
      "Requirement already satisfied: certifi in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->huggingface_hub) (2026.4.22)\n",
      "Requirement already satisfied: httpcore==1.* in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->huggingface_hub) (1.0.9)\n",
      "Requirement already satisfied: idna in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->huggingface_hub) (3.13)\n",
      "Requirement already satisfied: h11>=0.16 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->huggingface_hub) (0.16.0)\n",
      "Requirement already satisfied: click>=8.2.1 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from typer->huggingface_hub) (8.3.3)\n",
      "Requirement already satisfied: shellingham>=1.3.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from typer->huggingface_hub) (1.5.4)\n",
      "Requirement already satisfied: rich>=13.8.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from typer->huggingface_hub) (15.0.0)\n",
      "Requirement already satisfied: annotated-doc>=0.0.2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from typer->huggingface_hub) (0.0.4)\n",
      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from rich>=13.8.0->typer->huggingface_hub) (4.0.0)\n",
      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from rich>=13.8.0->typer->huggingface_hub) (2.19.2)\n",
      "Requirement already satisfied: mdurl~=0.1 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from markdown-it-py>=2.2.0->rich>=13.8.0->typer->huggingface_hub) (0.1.2)\n",
      "Collecting langchain_huggingface\n",
      "  Downloading langchain_huggingface-1.2.2-py3-none-any.whl.metadata (4.0 kB)\n",
      "Requirement already satisfied: huggingface-hub<2.0.0,>=0.33.4 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain_huggingface) (1.13.0)\n",
      "Requirement already satisfied: langchain-core<2.0.0,>=1.2.31 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain_huggingface) (1.3.2)\n",
      "Collecting tokenizers<1.0.0,>=0.19.1 (from langchain_huggingface)\n",
      "  Downloading tokenizers-0.23.1-cp310-abi3-macosx_10_12_x86_64.whl.metadata (9.8 kB)\n",
      "Requirement already satisfied: filelock>=3.10.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (3.29.0)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (2026.4.0)\n",
      "Requirement already satisfied: hf-xet<2.0.0,>=1.4.3 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (1.4.3)\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (0.28.1)\n",
      "Requirement already satisfied: packaging>=20.9 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (25.0)\n",
      "Requirement already satisfied: pyyaml>=5.1 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (6.0.3)\n",
      "Requirement already satisfied: tqdm>=4.42.1 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (4.67.3)\n",
      "Requirement already satisfied: typer in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (0.25.0)\n",
      "Requirement already satisfied: typing-extensions>=4.1.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (4.15.0)\n",
      "Requirement already satisfied: anyio in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (4.13.0)\n",
      "Requirement already satisfied: certifi in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (2026.4.22)\n",
      "Requirement already satisfied: httpcore==1.* in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (1.0.9)\n",
      "Requirement already satisfied: idna in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpx<1,>=0.23.0->huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (3.13)\n",
      "Requirement already satisfied: h11>=0.16 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (0.16.0)\n",
      "Requirement already satisfied: jsonpatch<2.0.0,>=1.33.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (1.33)\n",
      "Requirement already satisfied: langchain-protocol>=0.0.10 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (0.0.14)\n",
      "Requirement already satisfied: langsmith<1.0.0,>=0.3.45 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (0.7.38)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (2.13.3)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (9.1.4)\n",
      "Requirement already satisfied: uuid-utils<1.0,>=0.12.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (0.14.1)\n",
      "Requirement already satisfied: jsonpointer>=1.9 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (3.1.1)\n",
      "Requirement already satisfied: orjson>=3.9.14 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (3.11.8)\n",
      "Requirement already satisfied: requests-toolbelt>=1.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (1.0.0)\n",
      "Requirement already satisfied: requests>=2.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (2.33.1)\n",
      "Requirement already satisfied: xxhash>=3.0.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (3.7.0)\n",
      "Requirement already satisfied: zstandard>=0.23.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (0.25.0)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.46.3 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (2.46.3)\n",
      "Requirement already satisfied: typing-inspection>=0.4.2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (0.4.2)\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (3.4.7)\n",
      "Requirement already satisfied: urllib3<3,>=1.26 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain_huggingface) (2.6.3)\n",
      "Requirement already satisfied: click>=8.2.1 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from typer->huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (8.3.3)\n",
      "Requirement already satisfied: shellingham>=1.3.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from typer->huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (1.5.4)\n",
      "Requirement already satisfied: rich>=13.8.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from typer->huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (15.0.0)\n",
      "Requirement already satisfied: annotated-doc>=0.0.2 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from typer->huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (0.0.4)\n",
      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from rich>=13.8.0->typer->huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (4.0.0)\n",
      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from rich>=13.8.0->typer->huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (2.19.2)\n",
      "Requirement already satisfied: mdurl~=0.1 in /Users/macbook/anaconda2/envs/python313/lib/python3.13/site-packages (from markdown-it-py>=2.2.0->rich>=13.8.0->typer->huggingface-hub<2.0.0,>=0.33.4->langchain_huggingface) (0.1.2)\n",
      "Downloading langchain_huggingface-1.2.2-py3-none-any.whl (31 kB)\n",
      "Downloading tokenizers-0.23.1-cp310-abi3-macosx_10_12_x86_64.whl (3.1 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hInstalling collected packages: tokenizers, langchain_huggingface\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2/2\u001b[0m [langchain_huggingface]chain_huggingface]\n",
      "\u001b[1A\u001b[2KSuccessfully installed langchain_huggingface-1.2.2 tokenizers-0.23.1\n"
     ]
    }
   ],
   "source": [
    "!pip install huggingface_hub\n",
    "!pip install langchain_huggingface"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## API Setup"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**OPEN AI**\n",
    "\n",
    "Consulte la documentación oficial: [Inicio rápido](https://platform.openai.com/docs/quickstart) - sobre cómo obtener una API de OPEN AI.\n",
    "\n",
    "**Gemini**\n",
    "\n",
    "Para obtener la API key de **Google Gemini**:\n",
    "\n",
    "1. Ir al sitio [Google AI Studio](https://aistudio.google.com/).  \n",
    "2. Loggearte con tu cuenta Google.  \n",
    "3. Ir a la sección **Get API Key** y crear una nueva API key.  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Para guardar variables de entorno en un archivo `.env` y usar la biblioteca `dotenv` en Python para cargarlas, sigue estos pasos:\n",
    "\n",
    "### Guardar variables de entorno en un archivo `.env`:\n",
    "1. Crea un nuevo archivo en el directorio de su proyecto y llámalo `.env`. Este archivo almacenará tus variables de entorno.\n",
    "2. Agregua tus variables de entorno al archivo `.env` con el formato `VARIABLE_NAME=variable_value`. Por ejemplo:\n",
    "```\n",
    "OPENAI_API_KEY=your_api_key\n",
    "DATABASE_URL=your_database_url\n",
    "```\n",
    "\n",
    "### Usar `dotenv` en Python para cargar variables de entorno:\n",
    "3. Instala la biblioteca `dotenv` si aún no la ha instalado. Puedes instalarlo usando pip:\n",
    "```\n",
    "pip install python-dotenv\n",
    "```\n",
    "\n",
    "4. En tu script de Python, importa el módulo `dotenv`:\n",
    "```python\n",
    "from dotenv import load_dotenv\n",
    "```\n",
    "\n",
    "5. Carga las variables de entorno desde el archivo `.env` usando la función `load_dotenv()`. Coloca esta línea al principio del script:\n",
    "```python\n",
    "load_dotenv()\n",
    "```\n",
    "\n",
    "6. Accede a las variables de entorno en tu script de Python usando el diccionario `os.environ`. Por ejemplo:\n",
    "```python\n",
    "import os\n",
    "\n",
    "api_key = os.environ.get('API_KEY')\n",
    "database_url = os.environ.get('DATABASE_URL')\n",
    "\n",
    "print(\"Clave API:\", api_key)\n",
    "print(\"URL de la base de datos:\", database_url)\n",
    "```\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Nota**:\n",
    "\n",
    "- Asegúrate de agregar el archivo `.env` al archivo `.gitignore` de tu proyecto para evitar que se exponga información confidencial.\n",
    "- También puedes especificar la ruta al archivo `.env` si se encuentra en un directorio diferente:\n",
    "```python\n",
    "load_dotenv('/path/to/your/env/file/.env')\n",
    "```\n",
    "\n",
    "Siguiendo estos pasos, puedes guardar las variables de entorno en un archivo `.env` y usar la biblioteca `dotenv` de Python para cargarlas en tu script. Este enfoque ayuda a mantener la información confidencial separada de tu código fuente y facilita la gestión de las variables de entorno en tus proyectos."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/Users/macbook/Dropbox/Docencia/PLN_avanzado/Notebooks\n",
      "True\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "print(os.getcwd())\n",
    "print(os.path.exists(\".env\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Load environment variables\n",
    "\n",
    "from dotenv import load_dotenv,find_dotenv\n",
    "load_dotenv(find_dotenv())\n",
    "load_dotenv('.env')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/Users/macbook/Dropbox/Docencia/PLN_avanzado/Notebooks/.env'"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "find_dotenv()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Basic Query"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "## initialize Chat model\n",
    "import langchain\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
    "OPENAI_API_KEY=os.getenv(\"OPENAI_API_KEY\")\n",
    "chatOpenAI = ChatOpenAI(model_name=\"gpt-4o-mini\",temperature=0.3, openai_api_key=OPENAI_API_KEY)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AIzaSyD6ieqhGNVy27l6w-iTP6PsFx-eCZqA--o\n"
     ]
    }
   ],
   "source": [
    "## initialize Chat model\n",
    "import langchain\n",
    "from langchain_google_genai import ChatGoogleGenerativeAI\n",
    "\n",
    "GOOGLE_API_KEY=os.getenv(\"GOOGLE_API_KEY\")\n",
    "print(GOOGLE_API_KEY)\n",
    "chat = ChatGoogleGenerativeAI(model=\"gemini-3.1-flash-lite\",temperature=0.3, api_key=GOOGLE_API_KEY)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.2.16\n"
     ]
    }
   ],
   "source": [
    "print(langchain.__version__)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Interacción inmediata con el chat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'type': 'text', 'text': 'Large language models are advanced artificial intelligence systems trained on vast amounts of text data to predict, generate, and understand human language with remarkable fluency.', 'extras': {'signature': 'EjQKMgEMOdbHiLYeLuwK0MJBaw43nn8EXxBna299u9EjTzztO05k1PKgday48hDP49gh/VTw'}}]\n"
     ]
    }
   ],
   "source": [
    "response = chat.invoke(\"explain large language models in one sentence\")\n",
    "print(response.content,end='\\n')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Prueba con modelo Huggingface"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dotenv import load_dotenv, find_dotenv\n",
    "from langchain_huggingface import HuggingFaceEndpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "## loading environment variables\n",
    "load_dotenv()\n",
    "HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')\n",
    "\n",
    "## Hugginface Model ID\n",
    "repo_id = \"mistralai/Mistral-7B-Instruct-v0.2\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Initalize Hugginface Model\n",
    "llm = HuggingFaceEndpoint(\n",
    "    repo_id=repo_id,  \n",
    "    provider=\"huggingface_hub\",\n",
    "    task=\"conversational\", \n",
    "    temperature=0.5, \n",
    "    max_new_tokens=500,\n",
    "    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN)\n",
    "\n",
    "# llm = HuggingFaceHub(repo_id='tiiuae/falcon-7b-instruct', huggingfacehub_api_token=huggingfacehub_api_token)\n",
    "# llm = HuggingFaceEndpoint(\n",
    "#     repo_id=\"meta-llama/Meta-Llama-3-70B-Instruct\",\n",
    "#     task=\"text-generation\",\n",
    "#     max_new_tokens=512,\n",
    "#     do_sample=False,\n",
    "#     repetition_penalty=1.03,\n",
    "# )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Advertencia**\n",
    "Revisar permisos en https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "Provider 'huggingface_hub' not supported. Available values: 'auto' or any provider from ['black-forest-labs', 'cerebras', 'clarifai', 'cohere', 'deepinfra', 'fal-ai', 'featherless-ai', 'fireworks-ai', 'groq', 'hf-inference', 'hyperbolic', 'nebius', 'novita', 'nscale', 'nvidia', 'openai', 'ovhcloud', 'publicai', 'replicate', 'sambanova', 'scaleway', 'together', 'wavespeed', 'zai-org'].Passing 'auto' (default value) will automatically select the first provider available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mValueError\u001b[39m                                Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[29]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m response = \u001b[43mllm\u001b[49m\u001b[43m.\u001b[49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mexplain large language models in one sentence\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m      2\u001b[39m \u001b[38;5;28mprint\u001b[39m(response.content)\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~/anaconda2/envs/python313/lib/python3.13/site-packages/langchain_core/language_models/llms.py:381\u001b[39m, in \u001b[36mBaseLLM.invoke\u001b[39m\u001b[34m(self, input, config, stop, **kwargs)\u001b[39m\n\u001b[32m    370\u001b[39m \u001b[38;5;129m@override\u001b[39m\n\u001b[32m    371\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34minvoke\u001b[39m(\n\u001b[32m    372\u001b[39m     \u001b[38;5;28mself\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m    377\u001b[39m     **kwargs: Any,\n\u001b[32m    378\u001b[39m ) -> \u001b[38;5;28mstr\u001b[39m:\n\u001b[32m    379\u001b[39m     config = ensure_config(config)\n\u001b[32m    380\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m (\n\u001b[32m--> \u001b[39m\u001b[32m381\u001b[39m         \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mgenerate_prompt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    382\u001b[39m \u001b[43m            \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_convert_input\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    383\u001b[39m \u001b[43m            \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    384\u001b[39m \u001b[43m            \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcallbacks\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    385\u001b[39m \u001b[43m            \u001b[49m\u001b[43mtags\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtags\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    386\u001b[39m \u001b[43m            \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmetadata\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    387\u001b[39m \u001b[43m            \u001b[49m\u001b[43mrun_name\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrun_name\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    388\u001b[39m \u001b[43m            \u001b[49m\u001b[43mrun_id\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpop\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrun_id\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    389\u001b[39m \u001b[43m            \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    390\u001b[39m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    391\u001b[39m         .generations[\u001b[32m0\u001b[39m][\u001b[32m0\u001b[39m]\n\u001b[32m    392\u001b[39m         .text\n\u001b[32m    393\u001b[39m     )\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~/anaconda2/envs/python313/lib/python3.13/site-packages/langchain_core/language_models/llms.py:798\u001b[39m, in \u001b[36mBaseLLM.generate_prompt\u001b[39m\u001b[34m(self, prompts, stop, callbacks, **kwargs)\u001b[39m\n\u001b[32m    789\u001b[39m \u001b[38;5;129m@override\u001b[39m\n\u001b[32m    790\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mgenerate_prompt\u001b[39m(\n\u001b[32m    791\u001b[39m     \u001b[38;5;28mself\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m    795\u001b[39m     **kwargs: Any,\n\u001b[32m    796\u001b[39m ) -> LLMResult:\n\u001b[32m    797\u001b[39m     prompt_strings = [p.to_string() \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m prompts]\n\u001b[32m--> \u001b[39m\u001b[32m798\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt_strings\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~/anaconda2/envs/python313/lib/python3.13/site-packages/langchain_core/language_models/llms.py:1028\u001b[39m, in \u001b[36mBaseLLM.generate\u001b[39m\u001b[34m(self, prompts, stop, callbacks, tags, metadata, run_name, run_id, **kwargs)\u001b[39m\n\u001b[32m   1009\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mself\u001b[39m.cache \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m get_llm_cache() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m.cache \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m:\n\u001b[32m   1010\u001b[39m     run_managers = [\n\u001b[32m   1011\u001b[39m         callback_manager.on_llm_start(\n\u001b[32m   1012\u001b[39m             \u001b[38;5;28mself\u001b[39m._serialized,\n\u001b[32m   (...)\u001b[39m\u001b[32m   1026\u001b[39m         )\n\u001b[32m   1027\u001b[39m     ]\n\u001b[32m-> \u001b[39m\u001b[32m1028\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_generate_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   1029\u001b[39m \u001b[43m        \u001b[49m\u001b[43mprompts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1030\u001b[39m \u001b[43m        \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1031\u001b[39m \u001b[43m        \u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1032\u001b[39m \u001b[43m        \u001b[49m\u001b[43mnew_arg_supported\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mbool\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mnew_arg_supported\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1033\u001b[39m \u001b[43m        \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1034\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1035\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(missing_prompts) > \u001b[32m0\u001b[39m:\n\u001b[32m   1036\u001b[39m     run_managers = [\n\u001b[32m   1037\u001b[39m         callback_managers[idx].on_llm_start(\n\u001b[32m   1038\u001b[39m             \u001b[38;5;28mself\u001b[39m._serialized,\n\u001b[32m   (...)\u001b[39m\u001b[32m   1045\u001b[39m         \u001b[38;5;28;01mfor\u001b[39;00m idx \u001b[38;5;129;01min\u001b[39;00m missing_prompt_idxs\n\u001b[32m   1046\u001b[39m     ]\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~/anaconda2/envs/python313/lib/python3.13/site-packages/langchain_core/language_models/llms.py:824\u001b[39m, in \u001b[36mBaseLLM._generate_helper\u001b[39m\u001b[34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[39m\n\u001b[32m    813\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_generate_helper\u001b[39m(\n\u001b[32m    814\u001b[39m     \u001b[38;5;28mself\u001b[39m,\n\u001b[32m    815\u001b[39m     prompts: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;28mstr\u001b[39m],\n\u001b[32m   (...)\u001b[39m\u001b[32m    820\u001b[39m     **kwargs: Any,\n\u001b[32m    821\u001b[39m ) -> LLMResult:\n\u001b[32m    822\u001b[39m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m    823\u001b[39m         output = (\n\u001b[32m--> \u001b[39m\u001b[32m824\u001b[39m             \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_generate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    825\u001b[39m \u001b[43m                \u001b[49m\u001b[43mprompts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    826\u001b[39m \u001b[43m                \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    827\u001b[39m \u001b[43m                \u001b[49m\u001b[38;5;66;43;03m# TODO: support multiple run managers\u001b[39;49;00m\n\u001b[32m    828\u001b[39m \u001b[43m                \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m[\u001b[49m\u001b[32;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m    829\u001b[39m \u001b[43m                \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    830\u001b[39m \u001b[43m            \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    831\u001b[39m             \u001b[38;5;28;01mif\u001b[39;00m new_arg_supported\n\u001b[32m    832\u001b[39m             \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m._generate(prompts, stop=stop)\n\u001b[32m    833\u001b[39m         )\n\u001b[32m    834\u001b[39m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m    835\u001b[39m         \u001b[38;5;28;01mfor\u001b[39;00m run_manager \u001b[38;5;129;01min\u001b[39;00m run_managers:\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~/anaconda2/envs/python313/lib/python3.13/site-packages/langchain_core/language_models/llms.py:1530\u001b[39m, in \u001b[36mLLM._generate\u001b[39m\u001b[34m(self, prompts, stop, run_manager, **kwargs)\u001b[39m\n\u001b[32m   1527\u001b[39m new_arg_supported = inspect.signature(\u001b[38;5;28mself\u001b[39m._call).parameters.get(\u001b[33m\"\u001b[39m\u001b[33mrun_manager\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m   1528\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m prompt \u001b[38;5;129;01min\u001b[39;00m prompts:\n\u001b[32m   1529\u001b[39m     text = (\n\u001b[32m-> \u001b[39m\u001b[32m1530\u001b[39m         \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1531\u001b[39m         \u001b[38;5;28;01mif\u001b[39;00m new_arg_supported\n\u001b[32m   1532\u001b[39m         \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m._call(prompt, stop=stop, **kwargs)\n\u001b[32m   1533\u001b[39m     )\n\u001b[32m   1534\u001b[39m     generations.append([Generation(text=text)])\n\u001b[32m   1535\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m LLMResult(generations=generations)\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~/anaconda2/envs/python313/lib/python3.13/site-packages/langchain_huggingface/llms/huggingface_endpoint.py:367\u001b[39m, in \u001b[36mHuggingFaceEndpoint._call\u001b[39m\u001b[34m(self, prompt, stop, run_manager, **kwargs)\u001b[39m\n\u001b[32m    364\u001b[39m         completion += chunk.text\n\u001b[32m    365\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m completion\n\u001b[32m--> \u001b[39m\u001b[32m367\u001b[39m response_text = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mclient\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtext_generation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    368\u001b[39m \u001b[43m    \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m=\u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    369\u001b[39m \u001b[43m    \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    370\u001b[39m \u001b[43m    \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43minvocation_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    371\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    373\u001b[39m \u001b[38;5;66;03m# Maybe the generation has stopped at one of the stop sequences:\u001b[39;00m\n\u001b[32m    374\u001b[39m \u001b[38;5;66;03m# then we remove this stop sequence from the end of the generated text\u001b[39;00m\n\u001b[32m    375\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m stop_seq \u001b[38;5;129;01min\u001b[39;00m invocation_params[\u001b[33m\"\u001b[39m\u001b[33mstop\u001b[39m\u001b[33m\"\u001b[39m]:\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~/anaconda2/envs/python313/lib/python3.13/site-packages/huggingface_hub/inference/_client.py:2383\u001b[39m, in \u001b[36mInferenceClient.text_generation\u001b[39m\u001b[34m(self, prompt, details, stream, model, adapter_id, best_of, decoder_input_details, do_sample, frequency_penalty, grammar, max_new_tokens, repetition_penalty, return_full_text, seed, stop, stop_sequences, temperature, top_k, top_n_tokens, top_p, truncate, typical_p, watermark)\u001b[39m\n\u001b[32m   2377\u001b[39m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m   2378\u001b[39m             \u001b[33m\"\u001b[39m\u001b[33mAPI endpoint/model for text-generation is not served via TGI. Cannot return output as a stream.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m   2379\u001b[39m             \u001b[33m\"\u001b[39m\u001b[33m Please pass `stream=False` as input.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m   2380\u001b[39m         )\n\u001b[32m   2382\u001b[39m model_id = model \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m.model\n\u001b[32m-> \u001b[39m\u001b[32m2383\u001b[39m provider_helper = \u001b[43mget_provider_helper\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mprovider\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtask\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtext-generation\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   2384\u001b[39m request_parameters = provider_helper.prepare_request(\n\u001b[32m   2385\u001b[39m     inputs=prompt,\n\u001b[32m   2386\u001b[39m     parameters=parameters,\n\u001b[32m   (...)\u001b[39m\u001b[32m   2390\u001b[39m     api_key=\u001b[38;5;28mself\u001b[39m.token,\n\u001b[32m   2391\u001b[39m )\n\u001b[32m   2393\u001b[39m \u001b[38;5;66;03m# Handle errors separately for more precise error messages\u001b[39;00m\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~/anaconda2/envs/python313/lib/python3.13/site-packages/huggingface_hub/inference/_providers/__init__.py:266\u001b[39m, in \u001b[36mget_provider_helper\u001b[39m\u001b[34m(provider, task, model)\u001b[39m\n\u001b[32m    264\u001b[39m provider_tasks = PROVIDERS.get(provider)  \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[32m    265\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m provider_tasks \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m266\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m    267\u001b[39m         \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mProvider \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mprovider\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m not supported. Available values: \u001b[39m\u001b[33m'\u001b[39m\u001b[33mauto\u001b[39m\u001b[33m'\u001b[39m\u001b[33m or any provider from \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlist\u001b[39m(PROVIDERS.keys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    268\u001b[39m         \u001b[33m\"\u001b[39m\u001b[33mPassing \u001b[39m\u001b[33m'\u001b[39m\u001b[33mauto\u001b[39m\u001b[33m'\u001b[39m\u001b[33m (default value) will automatically select the first provider available for the model, sorted \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    269\u001b[39m         \u001b[33m\"\u001b[39m\u001b[33mby the user\u001b[39m\u001b[33m'\u001b[39m\u001b[33ms order in https://hf.co/settings/inference-providers.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    270\u001b[39m     )\n\u001b[32m    272\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m task \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m provider_tasks:\n\u001b[32m    273\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m    274\u001b[39m         \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mTask \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtask\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m not supported for provider \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mprovider\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m. Available tasks: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlist\u001b[39m(provider_tasks.keys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m    275\u001b[39m     )\n",
      "\u001b[31mValueError\u001b[39m: Provider 'huggingface_hub' not supported. Available values: 'auto' or any provider from ['black-forest-labs', 'cerebras', 'clarifai', 'cohere', 'deepinfra', 'fal-ai', 'featherless-ai', 'fireworks-ai', 'groq', 'hf-inference', 'hyperbolic', 'nebius', 'novita', 'nscale', 'nvidia', 'openai', 'ovhcloud', 'publicai', 'replicate', 'sambanova', 'scaleway', 'together', 'wavespeed', 'zai-org'].Passing 'auto' (default value) will automatically select the first provider available for the model, sorted by the user's order in https://hf.co/settings/inference-providers."
     ]
    }
   ],
   "source": [
    "response = llm.invoke(\"explain large language models in one sentence\")\n",
    "print(response.content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Messages"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "En LangChain, `SystemMessage`, `HumanMessage` y `AIMessage` son clases que representan diferentes tipos de mensajes que pueden intercambiarse durante las interacciones con un modelo de lenguaje. Estas distinciones ayudan a estructurar y contextualizar las conversaciones o flujos de trabajo que involucran LLM.\n",
    "\n",
    "- `SystemMessage`: Se utiliza para proporcionar contexto o instrucciones al modelo de lenguaje. Estos mensajes suelen definir cómo debe comportarse el modelo o qué rol debe asumir.\n",
    "- `HumanMessage`: Representa la entrada o las consultas de un usuario. Estos mensajes simulan la interacción de un participante humano en la conversación.\n",
    "- `AIMessage`: Representa las respuestas generadas por el modelo de lenguaje. Estos mensajes encapsulan la salida proporcionada por la IA en respuesta a mensajes humanos o del sistema."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import schema for chat messages and ChatOpenAI in order to query chatmodels GPT-3.5-turbo or GPT-4\n",
    "from langchain_core.messages import HumanMessage, SystemMessage, AIMessage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Certainly! Below is a Python script that demonstrates how to create and train a simple neural network using TensorFlow and Keras on simulated data. In this example, we'll generate some synthetic data for a regression task.\n",
      "\n",
      "### Prerequisites\n",
      "Make sure you have the following libraries installed:\n",
      "```bash\n",
      "pip install numpy tensorflow matplotlib\n",
      "```\n",
      "\n",
      "### Python Script\n",
      "\n",
      "```python\n",
      "import numpy as np\n",
      "import matplotlib.pyplot as plt\n",
      "from sklearn.model_selection import train_test_split\n",
      "from tensorflow import keras\n",
      "from tensorflow.keras import layers\n",
      "\n",
      "# Generate synthetic data\n",
      "def generate_data(num_samples=1000):\n",
      "    # Generate random input data\n",
      "    X = np.random.rand(num_samples, 1) * 10  # Features in the range [0, 10]\n",
      "    # Generate output data with some noise\n",
      "    y = 2 * X + 1 + np.random.normal(0, 1, (num_samples, 1))  # Linear relationship with noise\n",
      "    return X, y\n",
      "\n",
      "# Create the neural network model\n",
      "def create_model():\n",
      "    model = keras.Sequential()\n",
      "    model.add(layers.Dense(64, activation='relu', input_shape=(1,)))  # Input layer\n",
      "    model.add(layers.Dense(64, activation='relu'))  # Hidden layer\n",
      "    model.add(layers.Dense(1))  # Output layer\n",
      "    model.compile(optimizer='adam', loss='mean_squared_error')\n",
      "    return model\n",
      "\n",
      "# Main function to run the training\n",
      "def main():\n",
      "    # Generate data\n",
      "    X, y = generate_data(1000)\n",
      "\n",
      "    # Split the data into training and testing sets\n",
      "    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
      "\n",
      "    # Create the model\n",
      "    model = create_model()\n",
      "\n",
      "    # Train the model\n",
      "    history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=1)\n",
      "\n",
      "    # Evaluate the model\n",
      "    loss = model.evaluate(X_test, y_test)\n",
      "    print(f'Test Loss: {loss}')\n",
      "\n",
      "    # Plot training history\n",
      "    plt.plot(history.history['loss'], label='train loss')\n",
      "    plt.plot(history.history['val_loss'], label='validation loss')\n",
      "    plt.title('Training and Validation Loss')\n",
      "    plt.xlabel('Epoch')\n",
      "    plt.ylabel('Loss')\n",
      "    plt.legend()\n",
      "    plt.show()\n",
      "\n",
      "    # Make predictions\n",
      "    y_pred = model.predict(X_test)\n",
      "\n",
      "    # Plot predictions vs true values\n",
      "    plt.scatter(X_test, y_test, label='True Values', alpha=0.5)\n",
      "    plt.scatter(X_test, y_pred, label='Predictions', alpha=0.5)\n",
      "    plt.title('True Values vs Predictions')\n",
      "    plt.xlabel('Input Feature')\n",
      "    plt.ylabel('Output Value')\n",
      "    plt.legend()\n",
      "    plt.show()\n",
      "\n",
      "if __name__ == '__main__':\n",
      "    main()\n",
      "```\n",
      "\n",
      "### Explanation\n",
      "1. **Data Generation**: The `generate_data` function creates synthetic data based on a linear relationship with some added Gaussian noise.\n",
      "2. **Model Creation**: The `create_model` function defines a simple feedforward neural network with two hidden layers.\n",
      "3. **Training**: The model is trained on the generated data for 100 epochs, with a batch size of 32.\n",
      "4. **Evaluation**: After training, the model is evaluated on a test set, and the loss is printed.\n",
      "5. **Visualization**: The script plots the training and validation loss over epochs and compares the model's predictions against the true values.\n",
      "\n",
      "### Running the Script\n",
      "You can run the script in a Python environment where TensorFlow and other dependencies are installed. It will generate synthetic data, train the model, and display the results visually.\n"
     ]
    }
   ],
   "source": [
    "## LLM\n",
    "chat = ChatOpenAI(model_name=\"gpt-4o-mini\",temperature=0.3)\n",
    "\n",
    "## Messages\n",
    "messages = [\n",
    "    SystemMessage(content=\"You are an expert data scientist\"),\n",
    "    HumanMessage(content=\"Write a Python script that trains a neural network on simulated data\")\n",
    "]\n",
    "\n",
    "## Response\n",
    "response=chat.invoke(messages)\n",
    "\n",
    "## Print out\n",
    "print(response.content,end='\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'langchain_core.messages.ai.AIMessage'>\n"
     ]
    }
   ],
   "source": [
    "## check `reponse` class type\n",
    "print(type(response))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prompt Template"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "En LangChain, un `PromptTemplate` es una forma estructurada de crear y gestionar las instrucciones que se enviarán a un modelo de lenguaje. Permite definir una plantilla con marcadores de posición y luego rellenarlos con valores reales al generar una indicación. Este enfoque es útil para garantizar la coherencia y la reutilización de las instrucciones que se utilizan para diferentes tareas o interacciones con el modelo de lenguaje."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Importante**\n",
    "- Cabe destacar que la calidad de las instrucciones es crucial. Un buen punto de partida serían algunos [ejemplos de instrucciones proporcionados por OpenAI](https://platform.openai.com/docs/examples).\n",
    "- Consulta la [Guía de Ingeniería de Instrucciones](https://www.promptingguide.ai/) para obtener técnicas de indicación más avanzadas."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import prompt and define PromptTemplate\n",
    "from langchain_core.prompts import PromptTemplate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Create raw template\n",
    "template = \"\"\"\n",
    "You are a college professor with an expertise in building deep learning models. \n",
    "Please provide the answer of {question} like I am five years old child.\n",
    "\"\"\"\n",
    "\n",
    "## Initialize Prompt Tempalte\n",
    "prompt = PromptTemplate.from_template(\n",
    "    template=template,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Run LLM with PromptTemplate\n",
    "response = chat.invoke(prompt.format(question=\"What is gradient descent?\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Okay! Imagine you are on a big, bumpy hill, and you want to get to the bottom where it's flat and nice. But it's dark, and you can't see very well. \n",
      "\n",
      "Gradient descent is like feeling the ground with your feet to find the steepest way down. You take a small step in that direction, then stop and feel the ground again. You keep doing this—taking small steps down the hill—until you reach the bottom.\n",
      "\n",
      "In deep learning, we use gradient descent to help our computer learn. The computer is trying to find the best answer, like finding the bottom of the hill, and it does this by making small changes and checking if it’s getting closer to the right answer. Just like you, it keeps stepping down until it finds the best spot!\n"
     ]
    }
   ],
   "source": [
    "print(response.content,end='\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Nota**\n",
    "\n",
    "**Few-shot learning** es un enfoque de aprendizaje automático que permite a los modelos aprender y generalizar a partir de un pequeño número de ejemplos de entrenamiento. A diferencia de los métodos tradicionales, que requieren grandes cantidades de datos etiquetados, el aprendizaje de pocos ejemplos busca realizar tareas y hacer predicciones con precisión con solo unos pocos ejemplos por clase. Esto es especialmente valioso en escenarios donde la recopilación de datos es costosa, requiere mucho tiempo o resulta poco práctica.\n",
    "\n",
    "En el contexto de los LLM, proporcionar al LLM algunos ejemplos de entradas y salidas se denomina aprendizaje de pocos ejemplos (few-shotting) y es una forma sencilla pero eficaz de guiar la generación y, en algunos casos, mejorar drásticamente el rendimiento del modelo. Gracias a la conveniencia de las plantillas de indicaciones, podemos inyectar ejemplos semilla al LLM de forma sistemática.\n",
    "\n",
    "Consulte [Cómo usar ejemplos de pocos disparos en modelos de chat](https://python.langchain.com/v0.2/docs/how_to/few_shot_examples_chat/) y [Cómo usar ejemplos de pocos disparos](https://python.langchain.com/v0.2/docs/how_to/few_shot_examples/) para obtener más detalles."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Nota**\n",
    "En lugar de pedirle directamente al LLM una respuesta, la técnica **Chain-of-Thought** anima al modelo a pensar en una serie de pasos lógicos o pensamientos intermedios. Esto ayuda a abordar problemas complejos que requieren razonamiento y soluciones de varios pasos."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Chain"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "En LangChain, una **Cadena** es una secuencia estructurada de operaciones o pasos que se ejecutan para lograr una tarea específica. Las cadenas pueden considerarse flujos de trabajo que combinan diversos componentes, como indicaciones, modelos y pasos de procesamiento de datos, para automatizar tareas complejas o interacciones con modelos de lenguaje.\n",
    "\n",
    "Puntos clave sobre las cadenas:\n",
    "\n",
    "- **Pasos secuenciales**: Las cadenas consisten en una serie de pasos que se ejecutan uno tras otro. Cada paso puede implicar diferentes operaciones, como generar texto, procesar datos o interactuar con otros servicios.\n",
    "\n",
    "- **Modularidad**: Las cadenas permiten descomponer tareas complejas en componentes más pequeños y manejables. Cada paso de una cadena puede ser un módulo reutilizable, lo que facilita la creación y el mantenimiento de flujos de trabajo sofisticados.\n",
    "\n",
    "- **Flexibilidad**: Las cadenas se pueden personalizar para adaptarse a casos de uso específicos. Se puede definir la secuencia de operaciones y especificar las entradas y salidas de cada paso.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
    "## create chain of prompt and chat\n",
    "chain = prompt | chat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "## interact with chain\n",
    "response = chain.invoke({\"question\": \"What is gradient descent?\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Okay! Imagine you are on a big, bumpy hill, and you want to find the way down to the bottom where it's flat. You can't see the whole hill because it's really big, but you can feel the ground under your feet.\n",
      "\n",
      "Now, every time you take a step, you look around to see which way is downhill. You take a small step in that direction. Then, you stop and look again to see if you can go down even more. You keep doing this—taking little steps down the hill—until you finally reach the bottom.\n",
      "\n",
      "Gradient descent is like that! In the world of computers and math, we want to find the best answer to a problem, like finding the lowest point on that hill. The computer takes small steps, checking which way is down, until it finds the best answer.\n"
     ]
    }
   ],
   "source": [
    "print(response.content,end='\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_core.output_parsers import StrOutputParser\n",
    "\n",
    "## create another chain\n",
    "chain2 = prompt | chat | StrOutputParser()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Okay! Imagine you are on a big, bumpy hill, and you want to find the way down to the bottom where it's flat. But it's really foggy, and you can't see very far. \n",
      "\n",
      "So, what do you do? You take small steps! You look around to see which way is the steepest down, and then you take a little step in that direction. After that, you look around again and take another little step down. You keep doing this until you reach the bottom of the hill.\n",
      "\n",
      "In the world of computers and deep learning, gradient descent is like that! It helps the computer learn by taking small steps to find the best answer, just like you find your way down the hill. Each step helps it get closer to the right answer!\n"
     ]
    }
   ],
   "source": [
    "## invoke the chain with a dictionary input\n",
    "explanation=chain2.invoke({\"question\": \"What is gradient descent?\"})\n",
    "print(explanation,end='\\n')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Text Splitting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import utility for splitting up texts and split up the explanation given above into document chunks\n",
    "\n",
    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
    "\n",
    "text_splitter = RecursiveCharacterTextSplitter(\n",
    "    chunk_size = 50,\n",
    "    chunk_overlap  = 20,\n",
    ")\n",
    "\n",
    "texts = text_splitter.split_text(explanation)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22\n",
      "Chunk 1: Okay! Imagine you are on a big, bumpy hill, and\n",
      "\n",
      "Chunk 2: bumpy hill, and you want to find the way down to\n",
      "\n",
      "Chunk 3: the way down to the bottom where it's flat. But\n",
      "\n",
      "Chunk 4: it's flat. But it's really foggy, and you can't\n",
      "\n",
      "Chunk 5: and you can't see very far.\n",
      "\n",
      "Chunk 6: So, what do you do? You take small steps! You\n",
      "\n",
      "Chunk 7: small steps! You look around to see which way is\n",
      "\n",
      "Chunk 8: to see which way is the steepest down, and then\n",
      "\n",
      "Chunk 9: down, and then you take a little step in that\n",
      "\n",
      "Chunk 10: little step in that direction. After that, you\n",
      "\n",
      "Chunk 11: After that, you look around again and take\n",
      "\n",
      "Chunk 12: again and take another little step down. You keep\n",
      "\n",
      "Chunk 13: step down. You keep doing this until you reach\n",
      "\n",
      "Chunk 14: until you reach the bottom of the hill.\n",
      "\n",
      "Chunk 15: In the world of computers and deep learning,\n",
      "\n",
      "Chunk 16: and deep learning, gradient descent is like that!\n",
      "\n",
      "Chunk 17: is like that! It helps the computer learn by\n",
      "\n",
      "Chunk 18: computer learn by taking small steps to find the\n",
      "\n",
      "Chunk 19: steps to find the best answer, just like you find\n",
      "\n",
      "Chunk 20: just like you find your way down the hill. Each\n",
      "\n",
      "Chunk 21: down the hill. Each step helps it get closer to\n",
      "\n",
      "Chunk 22: it get closer to the right answer!\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Individual text chunks \n",
    "\n",
    "print(len(texts))\n",
    "for i, text in enumerate(texts):\n",
    "    print(f\"Chunk {i+1}: {text}\\n\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Retrieval-Augmented Generation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- Cinco componentes importantes para RAG:\n",
    "    - Cargar: `DocumentLoaders`\n",
    "    - Dividir: `TextSplitters`\n",
    "    - Almacenar: Bases de datos vectoriales y modelo de incrustaciones\n",
    "    - Recuperar: `Retriever`\n",
    "    - Generar: Un ChatModel/LLM genera una respuesta mediante una solicitud que incluye la pregunta y los datos recuperados"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Nota**\n",
    "Las bases de datos de vectores (almacenes de vectores) son bases de datos especializadas diseñadas para almacenar y recuperar vectores de alta dimensión. Se utilizan comúnmente en aprendizaje automático y ciencia de datos para tareas como la búsqueda de vecinos más cercanos, la búsqueda por similitud y la agrupación en clústeres. Los diferentes almacenes de vectores ofrecen distintas características, optimizaciones y capacidades."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ## Load documents from PDF\n",
    "# from langchain_community.document_loaders import PyPDFLoader\n",
    "\n",
    "# loader = PyPDFLoader(\"Syllabus_Asignatura_Tópicos_Especiales_II.pdf\")\n",
    "# pages = loader.load()\n",
    "# pages\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Load documents from webpages \n",
    "import bs4\n",
    "from langchain_community.document_loaders import WebBaseLoader\n",
    "\n",
    "loader = WebBaseLoader(\"https://labelyourdata.com/articles/llm-model-size\")\n",
    "pages = loader.load()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content=\"\\n\\n\\n\\nLLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tPlatform\\t\\n\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tServices\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\nLabeling Data\\n\\n\\n\\n\\n\\n\\nText & Documents\\n\\n\\n\\n\\n\\nMedical Imaging\\n\\n\\n\\n\\n\\n3D Point Cloud\\n\\n\\n\\n\\n\\nImage\\n\\n\\n\\n\\n\\nVideo\\n\\n\\n\\n\\n\\nAudio\\n\\n\\n\\n\\n\\nGIS\\n\\n\\n\\n\\n\\nProcessing Data\\n\\n\\n\\n\\n\\n\\nContent Moderation\\n\\n\\n\\n\\n\\nData Generation\\n\\n\\n\\n\\n\\nData Collection\\n\\n\\n\\n\\n\\nData Entry \\n\\n\\n\\n\\n\\nImproving ML Models\\n\\n\\n\\n\\n\\n\\nLLM Fine Tuning\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tWho We Serve\\t\\t\\t\\t\\t\\t\\n\\n\\nFor:\\n\\n\\n\\n\\n\\nML Engineers\\n\\n•\\n\\n\\n\\n\\nAI Business Executives\\n\\n•\\n\\n\\n\\n\\nProduct Managers\\n\\n•\\n\\n\\n\\n\\nAcademic Researchers\\n\\n•\\n\\n\\n\\n\\n\\nTechnology & Innovation\\n\\n\\n\\n\\n\\n\\nDrones\\n\\n\\n\\n\\n\\nFinTech\\n\\n\\n\\n\\n\\nRobotics\\n\\n\\n\\n\\n\\nGeospatial\\n\\n\\n\\n\\n\\nAutonomous Vehicles\\n\\n\\n\\n\\n\\nIndustry & Commerce\\n\\n\\n\\n\\n\\n\\nAgriculture\\n\\n\\n\\n\\n\\nManufacturing\\n\\n\\n\\n\\n\\nAviation\\n\\n\\n\\n\\n\\nRetail\\n\\n\\n\\n\\n\\nE-commerce\\n\\n\\n\\n\\n\\nAcademia & Services\\n\\n\\n\\n\\n\\n\\nAcademia\\n\\n\\n\\n\\n\\nHealthcare\\n\\n\\n\\n\\n\\nInsurance\\n\\n\\n\\n\\n\\nSoftware Agencies\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tAbout\\t\\n\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tCareers\\t\\n\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tResources\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\nCase Studies\\n\\n\\n\\n\\n\\n\\nFormula Student\\n\\n\\n\\n\\n\\nLandfill Detection Model Validation\\n\\n\\n\\n\\n\\nImage Annotation for Defense\\n\\n\\n\\n\\n\\nDrone Data Annotation\\n\\n\\n\\n\\n\\nAcoustic Target Detection\\n\\n\\n\\n\\n\\nVideo Annotation of Military UAVs\\n\\n\\n\\n\\n\\nTechnological University Dublin\\n\\n\\n\\n\\n\\nAnnotation for Skylum\\n\\n\\n\\n\\n\\nLatest from blog\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tView all\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n3D Computer Vision: How It Works and Where It’s Used\\n\\n\\n\\n\\n\\nGround Truth Data: What It Is and How to Build It Right\\n\\n\\n\\nWeekly\\nML DIGEST\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tView\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\nGuides\\n\\n\\n\\n\\n\\n\\nThe Guide to In-House Dataset Labeling\\n\\n\\n\\n\\n\\nThe Buyer’s Guide to Data Labeling Vendors\\n\\n\\n\\n\\n\\nThe Guide to Geospatial Annotation\\n\\n\\n\\n\\n\\nadditional help\\n\\n\\n\\n\\n\\n\\nFAQs\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tPricing\\t\\n\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\ttalk to sales\\n\\t\\t\\t\\t\\n\\n\\n\\n\\n\\ntalk to sales\\n\\n\\n\\n\\n\\t\\t\\t\\t\\tlog in\\n\\t\\t\\t\\t\\n\\n\\t\\t\\t\\t\\t\\trun free pilot\\n\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nStart Free Pilot\\nfill up this form to send your pilot request\\n\\n\\n\\nEmail is not valid.\\n\\n\\n\\nEmail is not valid\\n\\n\\n\\nPhone is not valid\\n\\n\\n\\nSome error text\\n\\n\\n\\t\\t\\t\\t\\t\\tSubmit\\n\\t\\t\\t\\t\\t\\t\\n\\nReferrer domain is wrong\\n\\n\\n\\n\\n\\n\\nThank you for contacting us!\\nWe'll get back to you shortly\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLabel Your Data were genuinely interested in the success of my project, asked good questions, and were flexible in working in my proprietary software environment.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nKyle Hamilton\\nPhD Researcher at TU Dublin\\n\\n\\n\\n\\n\\nTrusted by ML Professionals\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nBack to blog\\n\\n\\nPublished December 19, 2024\\nLLM Model Size: Comparison Chart & Performance Guide\\n\\n\\n\\n\\n\\n\\n\\n\\nKaryna Naminas\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tCEO of Label Your Data\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\nSummarize:\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nShare:\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nTable of Contents\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTL;DR\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat LLM Model Size Parameters Tell You About Its Performance\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow to Calculate LLM Model Size in GB\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tLLM Model Size Comparison Chart (2025)\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tLLM Size vs Performance: Finding the Balance\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tChoosing the Right LLM Model Size for Your Use Case\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat Else Affects LLM Performance Beyond Model Size\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTraining data quality\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tModel architecture\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tComputational resources\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tAbout Label Your Data\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tFAQ\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat is the size of an LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat is LLM parameter size?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow many GB is an LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow big is a 7B LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhy are LLM models so large?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat does 32B mean in LLM?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nTL;DR\\n\\n\\nLLM model size represents the number of parameters that determine a model’s capacity and memory footprint.\\n\\n\\nModel size in GB depends on both parameter count and precision: a 7B model is about 14 GB (FP16) or 7 GB (INT8).\\n\\n\\nLarger models offer higher reasoning accuracy but slower speed and higher cost.\\n\\n\\n7B-13B models provide the best balance for most use cases.\\n\\n\\n\\nLLM fine-tuning services\\nFirst fine-tuning is FREE\\nLEARN MORE\\nWhat LLM Model Size Parameters Tell You About Its PerformanceLLM model size is the total number of parameters a model uses to process and generate language.LLM model size is defined by the number of parameters a model uses to process and generate language. Each parameter is a numerical weight adjusted during LLM training to represent learned linguistic patterns. The total number of parameters determines how much information the model can store and how effectively it can understand context or perform reasoning tasks.LLM model size: Comparing top modelsModel size has expanded at an exponential rate. The original Transformer architecture in 2017 contained about 65 million parameters. GPT-3 introduced 175 billion, and models such as PaLM and GPT-4 now reach into the hundreds of billions or even over one trillion parameters. This rapid scaling is directly tied to improvements in reasoning and generalization.However, larger parameter counts increase computational and memory demands. A 1-billion-parameter model needs around 1.86 GB at 16-bit precision, while a 70-billion-parameter model can exceed 140 GB.\\xa0For most applications, models in the 7B-13B range balance reasoning quality with practical performance, especially when quantized or fine-tuned for specific tasks. Here, continuous LLM model comparison across sizes supports better planning for cost, performance, and hardware constraints.How to Calculate LLM Model Size in GBGPT4 LLM model size estimateLLM model size in gigabytes (GB) shows how much memory a model needs to store its parameters. One gigabyte equals roughly one billion bytes of data.Since each parameter is a number represented in bytes, the total model size depends on how many parameters it has and the precision used to store them.You can use a simple LLM model size calculator to estimate this value:Model size (bytes) = Number of parameters × Bytes per parameterAt 32-bit precision (FP32), each parameter takes 4 bytes. At 16-bit (FP16), it takes 2 bytes. Lower-precision or quantized formats such as INT8 (1 byte) or INT4 (0.5 byte) reduce storage significantly with minimal accuracy loss.For example, a 7-billion-parameter model occupies about 28 GB at FP32, 14 GB at FP16, 7 GB at INT8, and 3.5 GB at INT4. Similarly, a 70-billion-parameter model requires roughly 280 GB at FP32 or 70 GB at INT8. These values show how precision settings directly affect deployability and hardware cost.Quantization allows engineers to fit larger, more capable models within limited memory. Running a 9B model at 4-bit precision can often outperform a smaller 2B model at full precision, offering greater reasoning capacity while keeping memory use within typical GPU limits.\\n\\n\\n\\n\\nMeasuring latency requirements and average query complexity is crucial. For example, our e-commerce chatbot worked fine with a 7B model, but our content generation system needed at least 13B for acceptable quality.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nJoshua Odmark\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\tCIO and Founder,\\t\\t\\t\\tLocal Data Exchange\\n\\n\\nLLM Model Size Comparison Chart (2025)Comparing LLM model sizes highlights how parameter count and memory footprint scale across architectures. The following table shows approximate sizes for popular models in both FP16 and INT8 precision formats.\\xa0These estimates are based on publicly available data and common quantization levels used for deployment.Comparing LLM sizes shows how parameter count and memory footprint scale across model architectures. The LLM model size chart below summarizes commonly used models and their approximate storage needs under FP16 and INT8 precision. These values are based on public data and standard quantization methods used in deployment.ModelParameters (B)Size (FP16, GB)Size (INT8, GB)Typical Use CaseBERT Base0.110.40.2Text classification, embeddingsMistral 7B7.3147General NLP tasks, chat, summarizationGemma 2 9B9189Balanced reasoning and efficiencyLLaMA 3 70B7014070Complex reasoning, multilingual tasksMixtral 8×7B46 (active 12.9)2613Mixture-of-experts performance with lower costGPT-3175350175Broad NLP, text generationClaude 2~100200100Document analysis, enterprise tasksPaLM 25401080540Multimodal and multilingual applicationsGPT-4 (est.)>1,000>2,000>1,000Advanced reasoning, multimodal AIAn updated LLM size chart helps engineers quickly estimate which model can fit into their GPU memory before training or deployment.\\xa0In practice, models between 7B and 13B parameters represent the most efficient middle ground for local and enterprise-level fine-tuning. Larger architectures such as 70B and above are mainly used in cloud-based inference or research environments where high-capacity GPUs are available.LLM Size vs Performance: Finding the BalanceLLM performance and timeline comparison (Source: information is beautiful)LLM performance scales with size, but the relationship is not linear. Increasing parameter count improves reasoning, comprehension, and generalization up to a point, after which performance gains diminish while computational and energy costs rise sharply.Smaller models, such as those under 3 billion LLM parameters, handle basic text classification and sentiment analysis but often fail on multistep reasoning tasks. Models between 7B and 13B parameters deliver a strong balance of speed, accuracy, and cost efficiency. Beyond 70B parameters, performance improvements become incremental compared to the steep rise in compute and latency.Model Size RangeTypical TasksPerformanceTrade-Offs1–3BSimple NLP, embeddings, mobile inferenceFast, limited reasoningShallow context understanding7–13BGeneral chat, summarization, QAStrong balanceModerate compute cost30–70BAdvanced reasoning, multilingual, code generationHigh accuracyRequires enterprise GPUs100B+Multimodal, research-scale modelsPeak performanceVery high cost and latencyPerformance also depends on architecture and quantization. A quantized 9B model running at 4-bit precision can outperform a smaller 2B model at full precision while remaining within desktop GPU limits.\\xa0For practical deployments, evaluating both model size and precision provides a clearer indicator of real-world efficiency than parameter count alone.\\n\\n\\n\\n\\nStart with a smaller model and gradually scale up based on performance metrics. Find that sweet spot between performance and resource usage.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nChristian Marin\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\tCEO,\\t\\t\\t\\tFreezenova\\n\\n\\nChoosing the Right LLM Model Size for Your Use CaseUnderstanding LLM model sizeLarger models provide higher reasoning accuracy and language fluency but require significantly more memory, power, and processing time. Smaller or quantized models are faster, more affordable, and easier to deploy.Model TypeParametersTypical HardwareBest ForKey AdvantageSmall<3BLaptops, edge devicesClassification, sentiment, embeddingsLow latency and power useMedium7–13BConsumer GPUs (8–24 GB)Chatbots, summarization, RAG pipelinesBalanced accuracy and costLarge30–70BMulti-GPU or cloudComplex reasoning, multilingual QAHigher contextual accuracyVery Large100B+Enterprise clustersMultimodal or research modelsAdvanced reasoning and creativityFine-tuning allows teams to improve task-specific accuracy without moving to a larger model.\\xa0Many organizations use fine-tuned 7B or 13B models to achieve domain-level precision comparable to general-purpose 70B models. Label Your Data supports this approach by preparing domain-relevant annotated datasets optimized for fine-tuning and evaluation workflows.\\n\\n\\n\\n\\nUse cases matter: FAQs work with smaller models, while real-time chatbots need larger LLMs. Edge devices require lighter models, but the cloud can handle bigger ones. Start small, analyze, and scale as needed.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nGursharan Singh\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\tCo-Founder,\\t\\t\\t\\tWebSpero Solutions\\n\\n\\nOnce the right model scale is selected, LLM fine tuning becomes a key step to adapt the model to domain-specific tasks. Label Your Data supports this process through LLM fine-tuning services built on verified datasets and scalable QA frameworks for enterprise models and comparative research such as Gemini vs ChatGPT evaluation.What Else Affects LLM Performance Beyond Model SizeMoore’s Law in NLP: LLM model size is improving over timeModel size is a major factor in language model capability, but it is not the only one. Performance also depends on the quality of training data, architecture design, and available computational resources.\\xa0These factors determine how efficiently a model learns and how well it performs on real-world tasks.Training data qualityHigh-quality, diverse, and well-annotated datasets are essential for reliable performance. Even a large model will underperform if trained on noisy or biased ML datasets. Working with a specialized data annotation company like Label Your Data ensures that the training corpus is accurate, diverse, and task-specific. A trusted data annotation platform can also support model fine-tuning, QA, and benchmarking for different types of LLMs.Models designed for multimodal input require even more extensive annotation pipelines and higher data annotation pricing due to task complexity.Model architectureArchitectural choices define how effectively a model uses its parameters. Transformer-based designs, sparse attention mechanisms, and mixture-of-experts (MoE) frameworks allow larger models to scale efficiently without linear growth in computation. For instance, Mixtral 8×7B activates only a subset of experts during inference, achieving performance similar to 70B-parameter models at lower cost.The choice of architecture interacts closely with the quality of the input data. Even advanced transformer or mixture-of-experts architectures rely on professional data annotation workflows.Computational resourcesHardware capability affects both training and inference speed. GPUs, TPUs, and dedicated AI accelerators improve efficiency, while limited VRAM restricts model size and context window length. Quantization and distributed inference help reduce these hardware constraints, enabling larger models to run on mid-range systems.Overall, model performance reflects the interaction between size, data quality, architecture, and compute resources. Balancing these elements is key to achieving consistent accuracy and efficiency across different deployment environments.About Label Your Data\\nIf you choose to delegate LLM fine-tuning, run a free data pilot with Label Your Data. Our outsourcing strategy has helped many companies scale their ML projects. Here’s why: \\n\\n\\n\\n\\n\\nNo Commitment\\nCheck our performance based on a free trial\\n\\n\\n\\n\\n\\nFlexible Pricing\\nPay per labeled object or per annotation hour\\n\\n\\n\\n\\n\\nTool-Agnostic\\nWorking with every annotation tool, even your custom tools\\n\\n\\n\\n\\n\\nData Compliance\\nWork with a data-certified vendor: PCI DSS Level 1, ISO:2700, GDPR, CCPA\\n\\n\\nLLM fine-tuning services\\nFirst fine-tuning is FREE\\nLEARN MORE\\n\\n\\nFAQ\\n\\n\\n\\nWhat is the size of an LLM model?\\n\\n\\n\\n\\n\\n\\nThe size of an LLM model is determined by the total number of parameters it contains. Each parameter is a learned weight that helps the model understand and generate language. Together, these parameters define both the model’s capability and the amount of memory required to store it. \\n\\n\\n\\n\\nWhat is LLM parameter size?\\n\\n\\n\\n\\n\\n\\nParameter size refers to the total number of tunable weights in the model. It's a core measure of model capacity. Common LLM model sizes include 7B (e.g., Mistral), 70B (e.g., LLaMA 3), and 175B+ (e.g., GPT-3/4). Parameter count influences accuracy, memory footprint, and training duration. The LLM parameter size comparison helps identify which model scale suits your hardware and performance needs. \\n\\n\\n\\n\\nHow many GB is an LLM model?\\n\\n\\n\\n\\n\\n\\nThe size in GB depends on the number of parameters and the level of precision. For example, a 70B parameter model takes about 280 GB at 32-bit precision. With 8-bit quantization, that drops to around 70–90 GB. Very large models like GPT-4 or Claude 3.5 may exceed 1 TB if uncompressed. \\n\\n\\n\\n\\nHow big is a 7B LLM model?\\n\\n\\n\\n\\n\\n\\nA 7-billion-parameter (7B) model requires about 28 GB at 32-bit precision (FP32), 14 GB at 16-bit (FP16), and roughly 7 GB at 8-bit (INT8). When quantised to 4-bit precision (INT4), it can run in about 3.5 GB of memory, making it suitable for consumer-grade GPUs. \\n\\n\\n\\n\\nWhy are LLM models so large?\\n\\n\\n\\n\\n\\n\\nLLMs are large because they contain billions of parameters trained on extensive text datasets. Each parameter increases the model’s capacity to capture linguistic structure, context, and reasoning. As a result, performance scales with size; but so do compute, energy, and storage requirements.Large models require vast annotated datasets prepared through systematic data annotation services. These datasets feed billions of text or image recognition examples into the machine learning algorithm during training. \\n\\n\\n\\n\\nWhat does 32B mean in LLM?\\n\\n\\n\\n\\n\\n\\n“32B” stands for 32 billion parameters. The “B” indicates billions, which is a standard way of describing model scale. A 32B model sits between mid-size (7B–13B) and large-scale (70B+) architectures, requiring roughly 64 GB at FP16 or 32 GB at INT8 precision. \\n\\n\\n\\n\\n\\nWritten by\\n\\n\\n\\n\\n\\n\\n\\nKaryna Naminas\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tCEO of Label Your Data\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\nKaryna is the CEO of Label Your Data, a company specializing in data labeling solutions for machine learning projects. With a strong background in machine learning, she frequently collaborates with editors to share her expertise through articles, whitepapers, and presentations.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nTable of Contents\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTL;DR\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat LLM Model Size Parameters Tell You About Its Performance\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow to Calculate LLM Model Size in GB\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tLLM Model Size Comparison Chart (2025)\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tLLM Size vs Performance: Finding the Balance\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tChoosing the Right LLM Model Size for Your Use Case\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat Else Affects LLM Performance Beyond Model Size\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTraining data quality\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tModel architecture\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tComputational resources\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tAbout Label Your Data\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tFAQ\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat is the size of an LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat is LLM parameter size?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow many GB is an LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow big is a 7B LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhy are LLM models so large?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat does 32B mean in LLM?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nSubscribe to ML Digest\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\tSUBSCRIBE NOW\\n\\t\\t\\n\\n\\nBlog articles, academic ML research, news\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\tRead Next\\n\\t\\t\\t\\t\\n\\n\\n\\n\\nLLM Orchestration: Strategies, Frameworks, and Best Practices\\n13 min read\\nLearn more\\n\\n\\n\\nLLM Inference: Techniques for Optimized Deployment\\n13 min read\\nWhat’s LLM inference?\\n\\n\\n\\n\\n\\n\\n\\n\\nHigh-quality\\n\\n\\t\\t\\t\\t\\tdata annotation\\n\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\nfor ML\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nBest data labeling  service according  to G2 score\\n\\n\\n\\n\\nOur Services\\n\\nData Labeling\\nLLM fine tuning\\n\\n\\n\\nIndustries we work with\\n\\n\\nDrones\\nFinTech\\nRobotics\\nGeospatial\\nAviation\\nRetail\\n\\n\\nAgriculture\\nManufacturing\\nAcademia\\nInsurance\\nE-commerce\\nHealthcare\\n\\n\\n\\n\\nCompany\\n\\nAbout\\nPricing\\nPrivacy & Cookies Policy\\nData Privacy Notice\\nTerms of Use\\nAffiliate Program – Terms of Use\\n\\n\\n\\nLocation\\n\\n\\t\\t\\t\\tUS Office: \\n\\t\\t\\t\\t1007 North Orange Street, \\n\\t\\t\\t\\t4th Floor, Suite 122, \\n\\t\\t\\t\\tWilmington, DE 19801, USA. \\n\\n\\n\\t\\t\\t\\tEU Office: \\n\\t\\t\\t\\t2A Cheilonos street \\n\\t\\t\\t\\tThe Riverside Forum \\n\\t\\t\\t\\t2nd floor \\n\\t\\t\\t\\tNicosia, 1101, Cyprus \\n\\n\\n\\n\\n\\n© 2026 Label Your Data Team\\n\\n\\nSales:\\n\\n\\n\\n\\n\\n\\nJob Openings:\\n\\nworkable.com/labelyourdata \\n\\n\\n\\nFor other queries:\\n\\n\\n\\n\\n\\nFollow us. Stay tuned:\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\")]"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pages"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Instalar [FAISS](https://github.com/facebookresearch/faiss): una biblioteca para la búsqueda eficiente de similitudes y agrupamiento de vectores densos.\n",
    "\n",
    "`pip install faiss-cpu`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Split and Store\n",
    "\n",
    "from langchain_community.vectorstores import FAISS\n",
    "from langchain_openai import OpenAIEmbeddings\n",
    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
    "\n",
    "## Initialize splitter\n",
    "text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, \n",
    "                                               chunk_overlap = 50)\n",
    "\n",
    "## Initialize embeddings\n",
    "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")\n",
    "\n",
    "## Split documents\n",
    "documents = text_splitter.split_documents(pages)\n",
    "\n",
    "## Vectorize documents into vector store\n",
    "vector = FAISS.from_documents(documents,  ## documents\n",
    "                              embeddings) ## embedding model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tPlatform\\t\\n\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tServices\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\nLabeling Data\\n\\n\\n\\n\\n\\n\\nText & Documents\\n\\n\\n\\n\\n\\nMedical Imaging\\n\\n\\n\\n\\n\\n3D Point Cloud\\n\\n\\n\\n\\n\\nImage\\n\\n\\n\\n\\n\\nVideo\\n\\n\\n\\n\\n\\nAudio\\n\\n\\n\\n\\n\\nGIS\\n\\n\\n\\n\\n\\nProcessing Data\\n\\n\\n\\n\\n\\n\\nContent Moderation\\n\\n\\n\\n\\n\\nData Generation\\n\\n\\n\\n\\n\\nData Collection\\n\\n\\n\\n\\n\\nData Entry \\n\\n\\n\\n\\n\\nImproving ML Models\\n\\n\\n\\n\\n\\n\\nLLM Fine Tuning\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tWho We Serve\\t\\t\\t\\t\\t\\t\\n\\n\\nFor:\\n\\n\\n\\n\\n\\nML Engineers\\n\\n•\\n\\n\\n\\n\\nAI Business Executives\\n\\n•\\n\\n\\n\\n\\nProduct Managers\\n\\n•\\n\\n\\n\\n\\nAcademic Researchers\\n\\n•\\n\\n\\n\\n\\n\\nTechnology & Innovation\\n\\n\\n\\n\\n\\n\\nDrones\\n\\n\\n\\n\\n\\nFinTech\\n\\n\\n\\n\\n\\nRobotics\\n\\n\\n\\n\\n\\nGeospatial\\n\\n\\n\\n\\n\\nAutonomous Vehicles\\n\\n\\n\\n\\n\\nIndustry & Commerce\\n\\n\\n\\n\\n\\n\\nAgriculture\\n\\n\\n\\n\\n\\nManufacturing\\n\\n\\n\\n\\n\\nAviation\\n\\n\\n\\n\\n\\nRetail\\n\\n\\n\\n\\n\\nE-commerce\\n\\n\\n\\n\\n\\nAcademia & Services\\n\\n\\n\\n\\n\\n\\nAcademia\\n\\n\\n\\n\\n\\nHealthcare\\n\\n\\n\\n\\n\\nInsurance\\n\\n\\n\\n\\n\\nSoftware Agencies\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tAbout\\t\\n\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tCareers'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Careers\\t\\n\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tResources\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\nCase Studies\\n\\n\\n\\n\\n\\n\\nFormula Student\\n\\n\\n\\n\\n\\nLandfill Detection Model Validation\\n\\n\\n\\n\\n\\nImage Annotation for Defense\\n\\n\\n\\n\\n\\nDrone Data Annotation\\n\\n\\n\\n\\n\\nAcoustic Target Detection\\n\\n\\n\\n\\n\\nVideo Annotation of Military UAVs\\n\\n\\n\\n\\n\\nTechnological University Dublin\\n\\n\\n\\n\\n\\nAnnotation for Skylum\\n\\n\\n\\n\\n\\nLatest from blog\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tView all\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n3D Computer Vision: How It Works and Where It’s Used\\n\\n\\n\\n\\n\\nGround Truth Data: What It Is and How to Build It Right\\n\\n\\n\\nWeekly\\nML DIGEST\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tView\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\nGuides\\n\\n\\n\\n\\n\\n\\nThe Guide to In-House Dataset Labeling\\n\\n\\n\\n\\n\\nThe Buyer’s Guide to Data Labeling Vendors\\n\\n\\n\\n\\n\\nThe Guide to Geospatial Annotation\\n\\n\\n\\n\\n\\nadditional help\\n\\n\\n\\n\\n\\n\\nFAQs\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tPricing\\t\\n\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\ttalk to sales\\n\\t\\t\\t\\t\\n\\n\\n\\n\\n\\ntalk to sales\\n\\n\\n\\n\\n\\t\\t\\t\\t\\tlog in\\n\\t\\t\\t\\t\\n\\n\\t\\t\\t\\t\\t\\trun free pilot\\n\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nStart Free Pilot\\nfill up this form to send your pilot request\\n\\n\\n\\nEmail is not valid.\\n\\n\\n\\nEmail is not valid'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content=\"Email is not valid.\\n\\n\\n\\nEmail is not valid\\n\\n\\n\\nPhone is not valid\\n\\n\\n\\nSome error text\\n\\n\\n\\t\\t\\t\\t\\t\\tSubmit\\n\\t\\t\\t\\t\\t\\t\\n\\nReferrer domain is wrong\\n\\n\\n\\n\\n\\n\\nThank you for contacting us!\\nWe'll get back to you shortly\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLabel Your Data were genuinely interested in the success of my project, asked good questions, and were flexible in working in my proprietary software environment.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nKyle Hamilton\\nPhD Researcher at TU Dublin\\n\\n\\n\\n\\n\\nTrusted by ML Professionals\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nBack to blog\\n\\n\\nPublished December 19, 2024\\nLLM Model Size: Comparison Chart & Performance Guide\\n\\n\\n\\n\\n\\n\\n\\n\\nKaryna Naminas\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tCEO of Label Your Data\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\nSummarize:\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nShare:\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nTable of Contents\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTL;DR\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat LLM Model Size Parameters Tell You About Its Performance\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow to Calculate LLM Model Size in GB\"),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='LLM Model Size Comparison Chart (2025)\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tLLM Size vs Performance: Finding the Balance\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tChoosing the Right LLM Model Size for Your Use Case\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat Else Affects LLM Performance Beyond Model Size\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTraining data quality\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tModel architecture\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tComputational resources\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tAbout Label Your Data\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tFAQ\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat is the size of an LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat is LLM parameter size?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow many GB is an LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow big is a 7B LLM model?'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Why are LLM models so large?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat does 32B mean in LLM?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nTL;DR\\n\\n\\nLLM model size represents the number of parameters that determine a model’s capacity and memory footprint.\\n\\n\\nModel size in GB depends on both parameter count and precision: a 7B model is about 14 GB (FP16) or 7 GB (INT8).\\n\\n\\nLarger models offer higher reasoning accuracy but slower speed and higher cost.\\n\\n\\n7B-13B models provide the best balance for most use cases.'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='LLM fine-tuning services\\nFirst fine-tuning is FREE\\nLEARN MORE'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='What LLM Model Size Parameters Tell You About Its PerformanceLLM model size is the total number of parameters a model uses to process and generate language.LLM model size is defined by the number of parameters a model uses to process and generate language. Each parameter is a numerical weight adjusted during LLM training to represent learned linguistic patterns. The total number of parameters determines how much information the model can store and how effectively it can understand context or perform reasoning tasks.LLM model size: Comparing top modelsModel size has expanded at an exponential rate. The original Transformer architecture in 2017 contained about 65 million parameters. GPT-3 introduced 175 billion, and models such as PaLM and GPT-4 now reach into the hundreds of billions or even over one trillion parameters. This rapid scaling is directly tied to improvements in reasoning and generalization.However, larger parameter counts increase computational and memory demands. A'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='increase computational and memory demands. A 1-billion-parameter model needs around 1.86 GB at 16-bit precision, while a 70-billion-parameter model can exceed 140 GB.\\xa0For most applications, models in the 7B-13B range balance reasoning quality with practical performance, especially when quantized or fine-tuned for specific tasks. Here, continuous LLM model comparison across sizes supports better planning for cost, performance, and hardware constraints.How to Calculate LLM Model Size in GBGPT4 LLM model size estimateLLM model size in gigabytes (GB) shows how much memory a model needs to store its parameters. One gigabyte equals roughly one billion bytes of data.Since each parameter is a number represented in bytes, the total model size depends on how many parameters it has and the precision used to store them.You can use a simple LLM model size calculator to estimate this value:Model size (bytes) = Number of parameters × Bytes per parameterAt 32-bit precision (FP32), each parameter'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='32-bit precision (FP32), each parameter takes 4 bytes. At 16-bit (FP16), it takes 2 bytes. Lower-precision or quantized formats such as INT8 (1 byte) or INT4 (0.5 byte) reduce storage significantly with minimal accuracy loss.For example, a 7-billion-parameter model occupies about 28 GB at FP32, 14 GB at FP16, 7 GB at INT8, and 3.5 GB at INT4. Similarly, a 70-billion-parameter model requires roughly 280 GB at FP32 or 70 GB at INT8. These values show how precision settings directly affect deployability and hardware cost.Quantization allows engineers to fit larger, more capable models within limited memory. Running a 9B model at 4-bit precision can often outperform a smaller 2B model at full precision, offering greater reasoning capacity while keeping memory use within typical GPU limits.'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Measuring latency requirements and average query complexity is crucial. For example, our e-commerce chatbot worked fine with a 7B model, but our content generation system needed at least 13B for acceptable quality.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nJoshua Odmark\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\tCIO and Founder,\\t\\t\\t\\tLocal Data Exchange'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='LLM Model Size Comparison Chart (2025)Comparing LLM model sizes highlights how parameter count and memory footprint scale across architectures. The following table shows approximate sizes for popular models in both FP16 and INT8 precision formats.\\xa0These estimates are based on publicly available data and common quantization levels used for deployment.Comparing LLM sizes shows how parameter count and memory footprint scale across model architectures. The LLM model size chart below summarizes commonly used models and their approximate storage needs under FP16 and INT8 precision. These values are based on public data and standard quantization methods used in deployment.ModelParameters (B)Size (FP16, GB)Size (INT8, GB)Typical Use CaseBERT Base0.110.40.2Text classification, embeddingsMistral 7B7.3147General NLP tasks, chat, summarizationGemma 2 9B9189Balanced reasoning and efficiencyLLaMA 3 70B7014070Complex reasoning, multilingual tasksMixtral 8×7B46 (active 12.9)2613Mixture-of-experts'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='8×7B46 (active 12.9)2613Mixture-of-experts performance with lower costGPT-3175350175Broad NLP, text generationClaude 2~100200100Document analysis, enterprise tasksPaLM 25401080540Multimodal and multilingual applicationsGPT-4 (est.)>1,000>2,000>1,000Advanced reasoning, multimodal AIAn updated LLM size chart helps engineers quickly estimate which model can fit into their GPU memory before training or deployment.\\xa0In practice, models between 7B and 13B parameters represent the most efficient middle ground for local and enterprise-level fine-tuning. Larger architectures such as 70B and above are mainly used in cloud-based inference or research environments where high-capacity GPUs are available.LLM Size vs Performance: Finding the BalanceLLM performance and timeline comparison (Source: information is beautiful)LLM performance scales with size, but the relationship is not linear. Increasing parameter count improves reasoning, comprehension, and generalization up to a point, after which'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='and generalization up to a point, after which performance gains diminish while computational and energy costs rise sharply.Smaller models, such as those under 3 billion LLM parameters, handle basic text classification and sentiment analysis but often fail on multistep reasoning tasks. Models between 7B and 13B parameters deliver a strong balance of speed, accuracy, and cost efficiency. Beyond 70B parameters, performance improvements become incremental compared to the steep rise in compute and latency.Model Size RangeTypical TasksPerformanceTrade-Offs1–3BSimple NLP, embeddings, mobile inferenceFast, limited reasoningShallow context understanding7–13BGeneral chat, summarization, QAStrong balanceModerate compute cost30–70BAdvanced reasoning, multilingual, code generationHigh accuracyRequires enterprise GPUs100B+Multimodal, research-scale modelsPeak performanceVery high cost and latencyPerformance also depends on architecture and quantization. A quantized 9B model running at 4-bit'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='A quantized 9B model running at 4-bit precision can outperform a smaller 2B model at full precision while remaining within desktop GPU limits.\\xa0For practical deployments, evaluating both model size and precision provides a clearer indicator of real-world efficiency than parameter count alone.'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Start with a smaller model and gradually scale up based on performance metrics. Find that sweet spot between performance and resource usage.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nChristian Marin\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\tCEO,\\t\\t\\t\\tFreezenova'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Choosing the Right LLM Model Size for Your Use CaseUnderstanding LLM model sizeLarger models provide higher reasoning accuracy and language fluency but require significantly more memory, power, and processing time. Smaller or quantized models are faster, more affordable, and easier to deploy.Model TypeParametersTypical HardwareBest ForKey AdvantageSmall<3BLaptops, edge devicesClassification, sentiment, embeddingsLow latency and power useMedium7–13BConsumer GPUs (8–24 GB)Chatbots, summarization, RAG pipelinesBalanced accuracy and costLarge30–70BMulti-GPU or cloudComplex reasoning, multilingual QAHigher contextual accuracyVery Large100B+Enterprise clustersMultimodal or research modelsAdvanced reasoning and creativityFine-tuning allows teams to improve task-specific accuracy without moving to a larger model.\\xa0Many organizations use fine-tuned 7B or 13B models to achieve domain-level precision comparable to general-purpose 70B models. Label Your Data supports this approach by preparing'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Your Data supports this approach by preparing domain-relevant annotated datasets optimized for fine-tuning and evaluation workflows.'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Use cases matter: FAQs work with smaller models, while real-time chatbots need larger LLMs. Edge devices require lighter models, but the cloud can handle bigger ones. Start small, analyze, and scale as needed.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nGursharan Singh\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\tCo-Founder,\\t\\t\\t\\tWebSpero Solutions'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Once the right model scale is selected, LLM fine tuning becomes a key step to adapt the model to domain-specific tasks. Label Your Data supports this process through LLM fine-tuning services built on verified datasets and scalable QA frameworks for enterprise models and comparative research such as Gemini vs ChatGPT evaluation.What Else Affects LLM Performance Beyond Model SizeMoore’s Law in NLP: LLM model size is improving over timeModel size is a major factor in language model capability, but it is not the only one. Performance also depends on the quality of training data, architecture design, and available computational resources.\\xa0These factors determine how efficiently a model learns and how well it performs on real-world tasks.Training data qualityHigh-quality, diverse, and well-annotated datasets are essential for reliable performance. Even a large model will underperform if trained on noisy or biased ML datasets. Working with a specialized data annotation company like Label'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='a specialized data annotation company like Label Your Data ensures that the training corpus is accurate, diverse, and task-specific. A trusted data annotation platform can also support model fine-tuning, QA, and benchmarking for different types of LLMs.Models designed for multimodal input require even more extensive annotation pipelines and higher data annotation pricing due to task complexity.Model architectureArchitectural choices define how effectively a model uses its parameters. Transformer-based designs, sparse attention mechanisms, and mixture-of-experts (MoE) frameworks allow larger models to scale efficiently without linear growth in computation. For instance, Mixtral 8×7B activates only a subset of experts during inference, achieving performance similar to 70B-parameter models at lower cost.The choice of architecture interacts closely with the quality of the input data. Even advanced transformer or mixture-of-experts architectures rely on professional data annotation'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='rely on professional data annotation workflows.Computational resourcesHardware capability affects both training and inference speed. GPUs, TPUs, and dedicated AI accelerators improve efficiency, while limited VRAM restricts model size and context window length. Quantization and distributed inference help reduce these hardware constraints, enabling larger models to run on mid-range systems.Overall, model performance reflects the interaction between size, data quality, architecture, and compute resources. Balancing these elements is key to achieving consistent accuracy and efficiency across different deployment environments.About Label Your Data'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='If you choose to delegate LLM fine-tuning, run a free data pilot with Label Your Data. Our outsourcing strategy has helped many companies scale their ML projects. Here’s why:'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='No Commitment\\nCheck our performance based on a free trial\\n\\n\\n\\n\\n\\nFlexible Pricing\\nPay per labeled object or per annotation hour\\n\\n\\n\\n\\n\\nTool-Agnostic\\nWorking with every annotation tool, even your custom tools\\n\\n\\n\\n\\n\\nData Compliance\\nWork with a data-certified vendor: PCI DSS Level 1, ISO:2700, GDPR, CCPA\\n\\n\\nLLM fine-tuning services\\nFirst fine-tuning is FREE\\nLEARN MORE\\n\\n\\nFAQ\\n\\n\\n\\nWhat is the size of an LLM model?\\n\\n\\n\\n\\n\\n\\nThe size of an LLM model is determined by the total number of parameters it contains. Each parameter is a learned weight that helps the model understand and generate language. Together, these parameters define both the model’s capability and the amount of memory required to store it. \\n\\n\\n\\n\\nWhat is LLM parameter size?'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content=\"What is LLM parameter size?\\n\\n\\n\\n\\n\\n\\nParameter size refers to the total number of tunable weights in the model. It's a core measure of model capacity. Common LLM model sizes include 7B (e.g., Mistral), 70B (e.g., LLaMA 3), and 175B+ (e.g., GPT-3/4). Parameter count influences accuracy, memory footprint, and training duration. The LLM parameter size comparison helps identify which model scale suits your hardware and performance needs. \\n\\n\\n\\n\\nHow many GB is an LLM model?\\n\\n\\n\\n\\n\\n\\nThe size in GB depends on the number of parameters and the level of precision. For example, a 70B parameter model takes about 280 GB at 32-bit precision. With 8-bit quantization, that drops to around 70–90 GB. Very large models like GPT-4 or Claude 3.5 may exceed 1 TB if uncompressed. \\n\\n\\n\\n\\nHow big is a 7B LLM model?\"),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='How big is a 7B LLM model?\\n\\n\\n\\n\\n\\n\\nA 7-billion-parameter (7B) model requires about 28 GB at 32-bit precision (FP32), 14 GB at 16-bit (FP16), and roughly 7 GB at 8-bit (INT8). When quantised to 4-bit precision (INT4), it can run in about 3.5 GB of memory, making it suitable for consumer-grade GPUs. \\n\\n\\n\\n\\nWhy are LLM models so large?\\n\\n\\n\\n\\n\\n\\nLLMs are large because they contain billions of parameters trained on extensive text datasets. Each parameter increases the model’s capacity to capture linguistic structure, context, and reasoning. As a result, performance scales with size; but so do compute, energy, and storage requirements.Large models require vast annotated datasets prepared through systematic data annotation services. These datasets feed billions of text or image recognition examples into the machine learning algorithm during training. \\n\\n\\n\\n\\nWhat does 32B mean in LLM?'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='What does 32B mean in LLM?\\n\\n\\n\\n\\n\\n\\n“32B” stands for 32 billion parameters. The “B” indicates billions, which is a standard way of describing model scale. A 32B model sits between mid-size (7B–13B) and large-scale (70B+) architectures, requiring roughly 64 GB at FP16 or 32 GB at INT8 precision. \\n\\n\\n\\n\\n\\nWritten by\\n\\n\\n\\n\\n\\n\\n\\nKaryna Naminas\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tCEO of Label Your Data\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\nKaryna is the CEO of Label Your Data, a company specializing in data labeling solutions for machine learning projects. With a strong background in machine learning, she frequently collaborates with editors to share her expertise through articles, whitepapers, and presentations.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nTable of Contents\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTL;DR\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat LLM Model Size Parameters Tell You About Its Performance\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow to Calculate LLM Model Size in GB\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tLLM Model Size Comparison Chart (2025)'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='LLM Size vs Performance: Finding the Balance\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tChoosing the Right LLM Model Size for Your Use Case\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat Else Affects LLM Performance Beyond Model Size\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTraining data quality\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tModel architecture\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tComputational resources\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tAbout Label Your Data\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tFAQ\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat is the size of an LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat is LLM parameter size?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow many GB is an LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow big is a 7B LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhy are LLM models so large?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat does 32B mean in LLM?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nSubscribe to ML Digest'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Subscribe to ML Digest\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\tSUBSCRIBE NOW\\n\\t\\t\\n\\n\\nBlog articles, academic ML research, news\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\tRead Next\\n\\t\\t\\t\\t\\n\\n\\n\\n\\nLLM Orchestration: Strategies, Frameworks, and Best Practices\\n13 min read\\nLearn more\\n\\n\\n\\nLLM Inference: Techniques for Optimized Deployment\\n13 min read\\nWhat’s LLM inference?\\n\\n\\n\\n\\n\\n\\n\\n\\nHigh-quality\\n\\n\\t\\t\\t\\t\\tdata annotation\\n\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\nfor ML\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nBest data labeling  service according  to G2 score\\n\\n\\n\\n\\nOur Services\\n\\nData Labeling\\nLLM fine tuning\\n\\n\\n\\nIndustries we work with\\n\\n\\nDrones\\nFinTech\\nRobotics\\nGeospatial\\nAviation\\nRetail\\n\\n\\nAgriculture\\nManufacturing\\nAcademia\\nInsurance\\nE-commerce\\nHealthcare\\n\\n\\n\\n\\nCompany\\n\\nAbout\\nPricing\\nPrivacy & Cookies Policy\\nData Privacy Notice\\nTerms of Use\\nAffiliate Program – Terms of Use\\n\\n\\n\\nLocation\\n\\n\\t\\t\\t\\tUS Office: \\n\\t\\t\\t\\t1007 North Orange Street, \\n\\t\\t\\t\\t4th Floor, Suite 122, \\n\\t\\t\\t\\tWilmington, DE 19801, USA. \\n\\n\\n\\t\\t\\t\\tEU Office: \\n\\t\\t\\t\\t2A Cheilonos street \\n\\t\\t\\t\\tThe Riverside Forum \\n\\t\\t\\t\\t2nd floor \\n\\t\\t\\t\\tNicosia, 1101, Cyprus'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='© 2026 Label Your Data Team\\n\\n\\nSales:\\n\\n\\n\\n\\n\\n\\nJob Openings:\\n\\nworkable.com/labelyourdata \\n\\n\\n\\nFor other queries:\\n\\n\\n\\n\\n\\nFollow us. Stay tuned:')]"
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "documents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Given a query, find relevant documents from vector store\n",
    "docs = vector.similarity_search(\"Are larger models always better?\", k=3)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data: and generalization up to a point, after which performance gains diminish while computational and energy costs rise sharply.Smaller models, such as those under 3 billion LLM parameters, handle basic text classification and sentiment analysis but often fail on multistep reasoning tasks. Models between 7B and 13B parameters deliver a strong balance of speed, accuracy, and cost efficiency. Beyond 70B parameters, performance improvements become incremental compared to the steep rise in compute and la\n",
      "LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data: Choosing the Right LLM Model Size for Your Use CaseUnderstanding LLM model sizeLarger models provide higher reasoning accuracy and language fluency but require significantly more memory, power, and processing time. Smaller or quantized models are faster, more affordable, and easier to deploy.Model TypeParametersTypical HardwareBest ForKey AdvantageSmall<3BLaptops, edge devicesClassification, sentiment, embeddingsLow latency and power useMedium7–13BConsumer GPUs (8–24 GB)Chatbots, summarization, \n",
      "LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data: Why are LLM models so large?\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
      "\n",
      "\n",
      "\n",
      "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tWhat does 32B mean in LLM?\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "TL;DR\n",
      "\n",
      "\n",
      "LLM model size represents the number of parameters that determine a model’s capacity and memory footprint.\n",
      "\n",
      "\n",
      "Model size in GB depends on both parameter count and precision: a 7B model is about 14 GB (FP16) or 7 GB (INT8).\n",
      "\n",
      "\n",
      "Larger models offer higher reasoning accuracy but slower speed and higher cost.\n",
      "\n",
      "\n",
      "7B-13B models provide the best balance for most \n"
     ]
    }
   ],
   "source": [
    "## print out\n",
    "for doc in docs:\n",
    "    print(str(doc.metadata[\"title\"]) + \":\", doc.page_content[:500])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### RAG Using Chain"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- `create_stuff_documents_chain()`: Esta cadena toma una lista de documentos y los formatea en una solicitud, que luego pasa a un LLM. Pasa todos los documentos, por lo que debe asegurarse de que se ajusten a la ventana de contexto del LLM que está utilizando.\n",
    "\n",
    "- `create_retrieval_chain()`: Esta cadena recibe una consulta del usuario, que luego se pasa al recuperador para obtener los documentos relevantes. Estos documentos (y las entradas originales) (realizadas por `create_stuff_documents_chain()`) se pasan a un LLM para generar una respuesta."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tPlatform\\t\\n\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tServices\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\nLabeling Data\\n\\n\\n\\n\\n\\n\\nText & Documents\\n\\n\\n\\n\\n\\nMedical Imaging\\n\\n\\n\\n\\n\\n3D Point Cloud\\n\\n\\n\\n\\n\\nImage\\n\\n\\n\\n\\n\\nVideo\\n\\n\\n\\n\\n\\nAudio\\n\\n\\n\\n\\n\\nGIS\\n\\n\\n\\n\\n\\nProcessing Data\\n\\n\\n\\n\\n\\n\\nContent Moderation\\n\\n\\n\\n\\n\\nData Generation\\n\\n\\n\\n\\n\\nData Collection\\n\\n\\n\\n\\n\\nData Entry \\n\\n\\n\\n\\n\\nImproving ML Models\\n\\n\\n\\n\\n\\n\\nLLM Fine Tuning\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tWho We Serve\\t\\t\\t\\t\\t\\t\\n\\n\\nFor:\\n\\n\\n\\n\\n\\nML Engineers\\n\\n•\\n\\n\\n\\n\\nAI Business Executives\\n\\n•\\n\\n\\n\\n\\nProduct Managers\\n\\n•\\n\\n\\n\\n\\nAcademic Researchers\\n\\n•\\n\\n\\n\\n\\n\\nTechnology & Innovation\\n\\n\\n\\n\\n\\n\\nDrones\\n\\n\\n\\n\\n\\nFinTech\\n\\n\\n\\n\\n\\nRobotics\\n\\n\\n\\n\\n\\nGeospatial\\n\\n\\n\\n\\n\\nAutonomous Vehicles\\n\\n\\n\\n\\n\\nIndustry & Commerce\\n\\n\\n\\n\\n\\n\\nAgriculture\\n\\n\\n\\n\\n\\nManufacturing\\n\\n\\n\\n\\n\\nAviation\\n\\n\\n\\n\\n\\nRetail\\n\\n\\n\\n\\n\\nE-commerce\\n\\n\\n\\n\\n\\nAcademia & Services\\n\\n\\n\\n\\n\\n\\nAcademia\\n\\n\\n\\n\\n\\nHealthcare\\n\\n\\n\\n\\n\\nInsurance\\n\\n\\n\\n\\n\\nSoftware Agencies\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tAbout\\t\\n\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tCareers'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Careers\\t\\n\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tResources\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\nCase Studies\\n\\n\\n\\n\\n\\n\\nFormula Student\\n\\n\\n\\n\\n\\nLandfill Detection Model Validation\\n\\n\\n\\n\\n\\nImage Annotation for Defense\\n\\n\\n\\n\\n\\nDrone Data Annotation\\n\\n\\n\\n\\n\\nAcoustic Target Detection\\n\\n\\n\\n\\n\\nVideo Annotation of Military UAVs\\n\\n\\n\\n\\n\\nTechnological University Dublin\\n\\n\\n\\n\\n\\nAnnotation for Skylum\\n\\n\\n\\n\\n\\nLatest from blog\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tView all\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n3D Computer Vision: How It Works and Where It’s Used\\n\\n\\n\\n\\n\\nGround Truth Data: What It Is and How to Build It Right\\n\\n\\n\\nWeekly\\nML DIGEST\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tView\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\nGuides\\n\\n\\n\\n\\n\\n\\nThe Guide to In-House Dataset Labeling\\n\\n\\n\\n\\n\\nThe Buyer’s Guide to Data Labeling Vendors\\n\\n\\n\\n\\n\\nThe Guide to Geospatial Annotation\\n\\n\\n\\n\\n\\nadditional help\\n\\n\\n\\n\\n\\n\\nFAQs\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\tPricing\\t\\n\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\ttalk to sales\\n\\t\\t\\t\\t\\n\\n\\n\\n\\n\\ntalk to sales\\n\\n\\n\\n\\n\\t\\t\\t\\t\\tlog in\\n\\t\\t\\t\\t\\n\\n\\t\\t\\t\\t\\t\\trun free pilot\\n\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nStart Free Pilot\\nfill up this form to send your pilot request\\n\\n\\n\\nEmail is not valid.\\n\\n\\n\\nEmail is not valid'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content=\"Email is not valid.\\n\\n\\n\\nEmail is not valid\\n\\n\\n\\nPhone is not valid\\n\\n\\n\\nSome error text\\n\\n\\n\\t\\t\\t\\t\\t\\tSubmit\\n\\t\\t\\t\\t\\t\\t\\n\\nReferrer domain is wrong\\n\\n\\n\\n\\n\\n\\nThank you for contacting us!\\nWe'll get back to you shortly\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLabel Your Data were genuinely interested in the success of my project, asked good questions, and were flexible in working in my proprietary software environment.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nKyle Hamilton\\nPhD Researcher at TU Dublin\\n\\n\\n\\n\\n\\nTrusted by ML Professionals\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nBack to blog\\n\\n\\nPublished December 19, 2024\\nLLM Model Size: Comparison Chart & Performance Guide\\n\\n\\n\\n\\n\\n\\n\\n\\nKaryna Naminas\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tCEO of Label Your Data\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\nSummarize:\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nShare:\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nTable of Contents\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTL;DR\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat LLM Model Size Parameters Tell You About Its Performance\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow to Calculate LLM Model Size in GB\"),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='LLM Model Size Comparison Chart (2025)\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tLLM Size vs Performance: Finding the Balance\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tChoosing the Right LLM Model Size for Your Use Case\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat Else Affects LLM Performance Beyond Model Size\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTraining data quality\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tModel architecture\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tComputational resources\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tAbout Label Your Data\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tFAQ\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat is the size of an LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat is LLM parameter size?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow many GB is an LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow big is a 7B LLM model?'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Why are LLM models so large?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat does 32B mean in LLM?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nTL;DR\\n\\n\\nLLM model size represents the number of parameters that determine a model’s capacity and memory footprint.\\n\\n\\nModel size in GB depends on both parameter count and precision: a 7B model is about 14 GB (FP16) or 7 GB (INT8).\\n\\n\\nLarger models offer higher reasoning accuracy but slower speed and higher cost.\\n\\n\\n7B-13B models provide the best balance for most use cases.'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='LLM fine-tuning services\\nFirst fine-tuning is FREE\\nLEARN MORE'),\n",
       " Document(metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='What LLM Model Size Parameters Tell You About Its PerformanceLLM model size is the total number of parameters a model uses to process and generate language.LLM model size is defined by the number of parameters a model uses to process and generate language. Each parameter is a numerical weight adjusted during LLM training to represent learned linguistic patterns. The total number of parameters determines how much information the model can store and how effectively it can understand context or perform reasoning tasks.LLM model size: Comparing top modelsModel size has expanded at an exponential rate. The original Transformer architecture in 2017 contained about 65 million parameters. GPT-3 introduced 175 billion, and models such as PaLM and GPT-4 now reach into the hundreds of billions or even over one trillion parameters. This rapid scaling is directly tied to improvements in reasoning and generalization.However, larger parameter counts increase computational and memory demands. A')]"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "documents[:7]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'No, larger models are not always better. While they can offer higher reasoning accuracy and language fluency, they also require significantly more memory, power, and processing time. There is a trade-off between model size, performance, speed, and cost, and for many applications, models in the 7B-13B range provide a strong balance of these factors. Beyond a certain point, performance improvements may diminish while computational and energy costs rise sharply.'"
      ]
     },
     "execution_count": 138,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from langchain_core.documents import Document\n",
    "\n",
    "## define prompt template\n",
    "prompt = PromptTemplate.from_template(\"\"\"Answer the following question based only on the provided context:\n",
    "\n",
    "<context>\n",
    "{context}\n",
    "</context>\n",
    "\n",
    "Question: {input}\"\"\")\n",
    "\n",
    "## create chain\n",
    "document_chain = prompt | chat | StrOutputParser()\n",
    "\n",
    "## When invoking the caht, define `context`` documents\n",
    "document_chain.invoke({\n",
    "    \"input\": \"Are larger models always better?\",\n",
    "    \"context\": documents\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_classic.chains.combine_documents import (\n",
    "    create_stuff_documents_chain,\n",
    ")\n",
    "from langchain_classic.chains import create_retrieval_chain\n",
    "\n",
    "retriever = vector.as_retriever()\n",
    "\n",
    "## Specific setting for retriever\n",
    "# retriever = vector.as_retriever(\n",
    "#     search_type=\"similarity_score_threshold\", \n",
    "#     search_kwargs={\"score_threshold\": 0.3, \"k\":3})\n",
    "\n",
    "retrieval_chain = create_retrieval_chain(retriever, document_chain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The factors that impact model performance beyond size include:\n",
      "\n",
      "1. **Quality of Training Data**: High-quality, diverse, and well-annotated datasets are essential for reliable performance. Poor-quality data can lead to underperformance, even in large models.\n",
      "\n",
      "2. **Architecture Design**: The design of the model's architecture affects how efficiently it learns and performs tasks.\n",
      "\n",
      "3. **Available Computational Resources**: The computational resources available for training and running the model can influence its performance.\n"
     ]
    }
   ],
   "source": [
    "response = retrieval_chain.invoke({\"input\": \"what are the factors that impact model performance beyond size?\"})\n",
    "print(response[\"answer\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Chat History Management"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- Además de recuperar documentos externos como información de contexto, LLM también debe considerar el historial de conversaciones para obtener respuestas más precisas.\n",
    "- `create_history_aware_retriever()`: Esta cadena toma el historial de conversaciones y lo utiliza para generar una consulta de búsqueda que se pasa al recuperador subyacente."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_classic.chains import create_history_aware_retriever\n",
    "from langchain_core.prompts import MessagesPlaceholder, ChatPromptTemplate\n",
    "\n",
    "# First we need a prompt that we can pass into an LLM to generate this search query\n",
    "\n",
    "prompt = ChatPromptTemplate.from_messages([\n",
    "    MessagesPlaceholder(variable_name=\"chat_history\"),\n",
    "    (\"user\", \"{input}\"),\n",
    "    (\"user\", \"Given the above conversation, generate a search query to look up in order to get information relevant to the conversation\")\n",
    "])\n",
    "\n",
    "\n",
    "history_chain = create_history_aware_retriever(chat, retriever, prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(id='f4a5a8d8-d801-4dce-b4cf-73c38db86250', metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='rely on professional data annotation workflows.Computational resourcesHardware capability affects both training and inference speed. GPUs, TPUs, and dedicated AI accelerators improve efficiency, while limited VRAM restricts model size and context window length. Quantization and distributed inference help reduce these hardware constraints, enabling larger models to run on mid-range systems.Overall, model performance reflects the interaction between size, data quality, architecture, and compute resources. Balancing these elements is key to achieving consistent accuracy and efficiency across different deployment environments.About Label Your Data'),\n",
       " Document(id='a0d37eaf-9b58-48ff-8b13-8d75fdb5d027', metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Once the right model scale is selected, LLM fine tuning becomes a key step to adapt the model to domain-specific tasks. Label Your Data supports this process through LLM fine-tuning services built on verified datasets and scalable QA frameworks for enterprise models and comparative research such as Gemini vs ChatGPT evaluation.What Else Affects LLM Performance Beyond Model SizeMoore’s Law in NLP: LLM model size is improving over timeModel size is a major factor in language model capability, but it is not the only one. Performance also depends on the quality of training data, architecture design, and available computational resources.\\xa0These factors determine how efficiently a model learns and how well it performs on real-world tasks.Training data qualityHigh-quality, diverse, and well-annotated datasets are essential for reliable performance. Even a large model will underperform if trained on noisy or biased ML datasets. Working with a specialized data annotation company like Label'),\n",
       " Document(id='042bf685-898c-4d76-bae2-0db29a50e585', metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Choosing the Right LLM Model Size for Your Use CaseUnderstanding LLM model sizeLarger models provide higher reasoning accuracy and language fluency but require significantly more memory, power, and processing time. Smaller or quantized models are faster, more affordable, and easier to deploy.Model TypeParametersTypical HardwareBest ForKey AdvantageSmall<3BLaptops, edge devicesClassification, sentiment, embeddingsLow latency and power useMedium7–13BConsumer GPUs (8–24 GB)Chatbots, summarization, RAG pipelinesBalanced accuracy and costLarge30–70BMulti-GPU or cloudComplex reasoning, multilingual QAHigher contextual accuracyVery Large100B+Enterprise clustersMultimodal or research modelsAdvanced reasoning and creativityFine-tuning allows teams to improve task-specific accuracy without moving to a larger model.\\xa0Many organizations use fine-tuned 7B or 13B models to achieve domain-level precision comparable to general-purpose 70B models. Label Your Data supports this approach by preparing'),\n",
       " Document(id='dcc59da6-bddb-4057-829e-356ae52bdbe9', metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='and generalization up to a point, after which performance gains diminish while computational and energy costs rise sharply.Smaller models, such as those under 3 billion LLM parameters, handle basic text classification and sentiment analysis but often fail on multistep reasoning tasks. Models between 7B and 13B parameters deliver a strong balance of speed, accuracy, and cost efficiency. Beyond 70B parameters, performance improvements become incremental compared to the steep rise in compute and latency.Model Size RangeTypical TasksPerformanceTrade-Offs1–3BSimple NLP, embeddings, mobile inferenceFast, limited reasoningShallow context understanding7–13BGeneral chat, summarization, QAStrong balanceModerate compute cost30–70BAdvanced reasoning, multilingual, code generationHigh accuracyRequires enterprise GPUs100B+Multimodal, research-scale modelsPeak performanceVery high cost and latencyPerformance also depends on architecture and quantization. A quantized 9B model running at 4-bit')]"
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chat_history = [HumanMessage(content=\"How many factors impact the model's context?\"), \n",
    "                AIMessage(content=\"Three.\")]\n",
    "\n",
    "history_chain.invoke({\n",
    "    \"chat_history\": chat_history,\n",
    "    \"input\": \"What are the three factors?\"\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = ChatPromptTemplate.from_messages([\n",
    "    (\"system\", \"Answer the user's questions based on the below context:\\n\\n{context}\"),\n",
    "    MessagesPlaceholder(variable_name=\"chat_history\"),\n",
    "    (\"user\", \"{input}\"),\n",
    "])\n",
    "\n",
    "## user query\n",
    "document_chain = create_stuff_documents_chain(chat, prompt)\n",
    "\n",
    "## combine user query, history chain\n",
    "retrieval_chain = create_retrieval_chain(history_chain, document_chain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'chat_history': [HumanMessage(content=\"How many factors impact the model's context?\", additional_kwargs={}, response_metadata={}),\n",
       "  AIMessage(content='Three.', additional_kwargs={}, response_metadata={}, tool_calls=[], invalid_tool_calls=[])],\n",
       " 'input': 'Are you saying three?',\n",
       " 'context': [Document(id='a0d37eaf-9b58-48ff-8b13-8d75fdb5d027', metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Once the right model scale is selected, LLM fine tuning becomes a key step to adapt the model to domain-specific tasks. Label Your Data supports this process through LLM fine-tuning services built on verified datasets and scalable QA frameworks for enterprise models and comparative research such as Gemini vs ChatGPT evaluation.What Else Affects LLM Performance Beyond Model SizeMoore’s Law in NLP: LLM model size is improving over timeModel size is a major factor in language model capability, but it is not the only one. Performance also depends on the quality of training data, architecture design, and available computational resources.\\xa0These factors determine how efficiently a model learns and how well it performs on real-world tasks.Training data qualityHigh-quality, diverse, and well-annotated datasets are essential for reliable performance. Even a large model will underperform if trained on noisy or biased ML datasets. Working with a specialized data annotation company like Label'),\n",
       "  Document(id='042bf685-898c-4d76-bae2-0db29a50e585', metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Choosing the Right LLM Model Size for Your Use CaseUnderstanding LLM model sizeLarger models provide higher reasoning accuracy and language fluency but require significantly more memory, power, and processing time. Smaller or quantized models are faster, more affordable, and easier to deploy.Model TypeParametersTypical HardwareBest ForKey AdvantageSmall<3BLaptops, edge devicesClassification, sentiment, embeddingsLow latency and power useMedium7–13BConsumer GPUs (8–24 GB)Chatbots, summarization, RAG pipelinesBalanced accuracy and costLarge30–70BMulti-GPU or cloudComplex reasoning, multilingual QAHigher contextual accuracyVery Large100B+Enterprise clustersMultimodal or research modelsAdvanced reasoning and creativityFine-tuning allows teams to improve task-specific accuracy without moving to a larger model.\\xa0Many organizations use fine-tuned 7B or 13B models to achieve domain-level precision comparable to general-purpose 70B models. Label Your Data supports this approach by preparing'),\n",
       "  Document(id='f4a5a8d8-d801-4dce-b4cf-73c38db86250', metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='rely on professional data annotation workflows.Computational resourcesHardware capability affects both training and inference speed. GPUs, TPUs, and dedicated AI accelerators improve efficiency, while limited VRAM restricts model size and context window length. Quantization and distributed inference help reduce these hardware constraints, enabling larger models to run on mid-range systems.Overall, model performance reflects the interaction between size, data quality, architecture, and compute resources. Balancing these elements is key to achieving consistent accuracy and efficiency across different deployment environments.About Label Your Data'),\n",
       "  Document(id='dcc59da6-bddb-4057-829e-356ae52bdbe9', metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='and generalization up to a point, after which performance gains diminish while computational and energy costs rise sharply.Smaller models, such as those under 3 billion LLM parameters, handle basic text classification and sentiment analysis but often fail on multistep reasoning tasks. Models between 7B and 13B parameters deliver a strong balance of speed, accuracy, and cost efficiency. Beyond 70B parameters, performance improvements become incremental compared to the steep rise in compute and latency.Model Size RangeTypical TasksPerformanceTrade-Offs1–3BSimple NLP, embeddings, mobile inferenceFast, limited reasoningShallow context understanding7–13BGeneral chat, summarization, QAStrong balanceModerate compute cost30–70BAdvanced reasoning, multilingual, code generationHigh accuracyRequires enterprise GPUs100B+Multimodal, research-scale modelsPeak performanceVery high cost and latencyPerformance also depends on architecture and quantization. A quantized 9B model running at 4-bit')],\n",
       " 'answer': \"I apologize for the confusion. There are multiple factors that impact a model's context, including model size, training data quality, architecture design, and computational resources. These elements interact to determine how well a model performs in real-world tasks.\"}"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "retrieval_chain.invoke({\n",
    "    \"chat_history\": chat_history,\n",
    "    \"input\": \"Are you saying three?\"\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "chat_history = [HumanMessage(content=\"How many factors impact the model's context?\"), \n",
    "                AIMessage(content=\"Three.\"),\n",
    "                HumanMessage(content='The context mentions several factors that impact model performance, including data quality, architecture design, and computational resources.'),\n",
    "                AIMessage(content='Thank you for the information.')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'chat_history': [HumanMessage(content=\"How many factors impact the model's context?\", additional_kwargs={}, response_metadata={}),\n",
       "  AIMessage(content='Three.', additional_kwargs={}, response_metadata={}, tool_calls=[], invalid_tool_calls=[]),\n",
       "  HumanMessage(content='The context mentions several factors that impact model performance, including data quality, architecture design, and computational resources.', additional_kwargs={}, response_metadata={}),\n",
       "  AIMessage(content='Thank you for the information.', additional_kwargs={}, response_metadata={}, tool_calls=[], invalid_tool_calls=[])],\n",
       " 'input': 'Can you repeat the three factors?',\n",
       " 'context': [Document(id='f4a5a8d8-d801-4dce-b4cf-73c38db86250', metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='rely on professional data annotation workflows.Computational resourcesHardware capability affects both training and inference speed. GPUs, TPUs, and dedicated AI accelerators improve efficiency, while limited VRAM restricts model size and context window length. Quantization and distributed inference help reduce these hardware constraints, enabling larger models to run on mid-range systems.Overall, model performance reflects the interaction between size, data quality, architecture, and compute resources. Balancing these elements is key to achieving consistent accuracy and efficiency across different deployment environments.About Label Your Data'),\n",
       "  Document(id='a0d37eaf-9b58-48ff-8b13-8d75fdb5d027', metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='Once the right model scale is selected, LLM fine tuning becomes a key step to adapt the model to domain-specific tasks. Label Your Data supports this process through LLM fine-tuning services built on verified datasets and scalable QA frameworks for enterprise models and comparative research such as Gemini vs ChatGPT evaluation.What Else Affects LLM Performance Beyond Model SizeMoore’s Law in NLP: LLM model size is improving over timeModel size is a major factor in language model capability, but it is not the only one. Performance also depends on the quality of training data, architecture design, and available computational resources.\\xa0These factors determine how efficiently a model learns and how well it performs on real-world tasks.Training data qualityHigh-quality, diverse, and well-annotated datasets are essential for reliable performance. Even a large model will underperform if trained on noisy or biased ML datasets. Working with a specialized data annotation company like Label'),\n",
       "  Document(id='d3f66ade-6da6-473e-8aae-d3ed8d293a3d', metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='a specialized data annotation company like Label Your Data ensures that the training corpus is accurate, diverse, and task-specific. A trusted data annotation platform can also support model fine-tuning, QA, and benchmarking for different types of LLMs.Models designed for multimodal input require even more extensive annotation pipelines and higher data annotation pricing due to task complexity.Model architectureArchitectural choices define how effectively a model uses its parameters. Transformer-based designs, sparse attention mechanisms, and mixture-of-experts (MoE) frameworks allow larger models to scale efficiently without linear growth in computation. For instance, Mixtral 8×7B activates only a subset of experts during inference, achieving performance similar to 70B-parameter models at lower cost.The choice of architecture interacts closely with the quality of the input data. Even advanced transformer or mixture-of-experts architectures rely on professional data annotation'),\n",
       "  Document(id='406cf0c2-78cd-4a23-b9dd-853ebb02c9cd', metadata={'source': 'https://labelyourdata.com/articles/llm-model-size', 'title': 'LLM Model Size: 2026 Comparison Chart & Performance Guide | Label Your Data', 'description': '▶️ Learn what LLM model size means, how it’s measured, and how it affects speed, cost, and accuracy. Updated 2025 size charts and LLM model comparisons in GB.', 'language': 'en'}, page_content='LLM Size vs Performance: Finding the Balance\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tChoosing the Right LLM Model Size for Your Use Case\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat Else Affects LLM Performance Beyond Model Size\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTraining data quality\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tModel architecture\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tComputational resources\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tAbout Label Your Data\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tFAQ\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat is the size of an LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat is LLM parameter size?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow many GB is an LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHow big is a 7B LLM model?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhy are LLM models so large?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tWhat does 32B mean in LLM?\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nSubscribe to ML Digest')],\n",
       " 'answer': 'The three factors that impact model performance are:\\n\\n1. Training data quality\\n2. Model architecture\\n3. Computational resources'}"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "retrieval_chain.invoke({\n",
    "    \"chat_history\": chat_history,\n",
    "    \"input\": \"Can you repeat the three factors?\"\n",
    "})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Memory"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Actualmente, el módulo «Memory» se encuentra en desarrollo activo.\n",
    "\n",
    "Para trabajar con Memory, utilizaremos la cadena heredada «langchain.chains.LLMChain()», cuya compatibilidad con el framework [LCEL](https://python.langchain.com/docs/expression_language) aún se encuentra en desarrollo."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/bt/y86856n55ll0r7r6qg83x4w80000gq/T/ipykernel_81569/3574223766.py:18: LangChainDeprecationWarning: Please see the migration guide at: https://python.langchain.com/docs/versions/migrating_memory/\n",
      "  memory = ConversationBufferMemory(memory_key=\"chat_history\", k = 1)\n",
      "/var/folders/bt/y86856n55ll0r7r6qg83x4w80000gq/T/ipykernel_81569/3574223766.py:19: LangChainDeprecationWarning: The class `LLMChain` was deprecated in LangChain 0.1.17 and will be removed in 1.0. Use `RunnableSequence, e.g., `prompt | llm`` instead.\n",
      "  conversation = LLMChain(\n"
     ]
    }
   ],
   "source": [
    "from langchain_openai import OpenAI\n",
    "from langchain_classic.prompts import PromptTemplate\n",
    "from langchain_classic.chains import LLMChain\n",
    "from langchain_classic.memory import ConversationBufferMemory\n",
    "\n",
    "\n",
    "# Notice that \"chat_history\" is present in the prompt template\n",
    "template = \"\"\"You are a university professor having a conversation with a student.\n",
    "\n",
    "Previous conversation:\n",
    "{chat_history}\n",
    "\n",
    "New student's question: {question}\n",
    "Response:\"\"\"\n",
    "\n",
    "prompt = PromptTemplate.from_template(template)\n",
    "# Notice that we need to align the `memory_key`\n",
    "memory = ConversationBufferMemory(memory_key=\"chat_history\", k = 1)\n",
    "conversation = LLMChain(\n",
    "    llm=chat,\n",
    "    prompt=prompt,\n",
    "    verbose=True, ## see the original prompts\n",
    "    memory=memory\n",
    ")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
      "Prompt after formatting:\n",
      "\u001b[32;1m\u001b[1;3mYou are a university professor having a conversation with a student.\n",
      "\n",
      "Previous conversation:\n",
      "\n",
      "\n",
      "New student's question: what is your name?\n",
      "Response:\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'question': 'what is your name?',\n",
       " 'chat_history': '',\n",
       " 'text': \"My name is Professor Smith. It's nice to meet you! What can I help you with today?\"}"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conversation.invoke(\"what is your name?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "memory.chat_memory.add_user_message(\"I think your name is Helena Gómez, right?\")\n",
    "memory.chat_memory.add_ai_message(\"Yes. My name is Helena Gómez.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
      "Prompt after formatting:\n",
      "\u001b[32;1m\u001b[1;3mYou are a university professor having a conversation with a student.\n",
      "\n",
      "Previous conversation:\n",
      "Human: what is your name?\n",
      "AI: My name is Professor Smith. It's nice to meet you! What can I help you with today?\n",
      "Human: I think your name is Helena Gómez, right?\n",
      "AI: Yes. My name is Helena Gómez.\n",
      "\n",
      "New student's question: So what is your name really?\n",
      "Response:\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'question': 'So what is your name really?',\n",
       " 'chat_history': \"Human: what is your name?\\nAI: My name is Professor Smith. It's nice to meet you! What can I help you with today?\\nHuman: I think your name is Helena Gómez, right?\\nAI: Yes. My name is Helena Gómez.\",\n",
       " 'text': \"My name is Helena Gómez, as I mentioned earlier. It's important to me that you feel comfortable addressing me by my name. How can I assist you further?\"}"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conversation.invoke(\"So what is your name really?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'chat_history': \"Human: what is your name?\\nAI: My name is Professor Smith. It's nice to meet you! What can I help you with today?\\nHuman: I think your name is Helena Gómez, right?\\nAI: Yes. My name is Helena Gómez.\\nHuman: So what is your name really?\\nAI: My name is Helena Gómez, as I mentioned earlier. It's important to me that you feel comfortable addressing me by my name. How can I assist you further?\"}"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## somehow the k window size is not working?\n",
    "memory.load_memory_variables({})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## References\n",
    "\n",
    "- [Langchain Crash Course for Beginners](https://youtu.be/lG7Uxts9SXs?si=07gr6zeB9tDkHjGm)\n",
    "- [Langchain Documentation](https://python.langchain.com/docs/get_started/introduction)\n",
    "- [Langchain Quickstart](https://python.langchain.com/docs/get_started/quickstart)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "python313",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}