{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "machine_shape": "hm",
      "gpuType": "A100"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "23a2c2f641d34f47ba763ddc4c3c30b6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_bdede25d1d99431b813997ca27255b07",
              "IPY_MODEL_57cfa3f6bf1b4f099f0faee8550c9e9a",
              "IPY_MODEL_7bbe65479ecc4c8abe6f66ebe0fcd439"
            ],
            "layout": "IPY_MODEL_2c705c96605847428f68baa2a2831e5b"
          }
        },
        "bdede25d1d99431b813997ca27255b07": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_caf004f12763403aa8147a6ba7b9110f",
            "placeholder": "​",
            "style": "IPY_MODEL_17c046a5112e4eddb8812a98a4ff513d",
            "value": "Loading weights: 100%"
          }
        },
        "57cfa3f6bf1b4f099f0faee8550c9e9a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7da5a233d937487bb71ec8e6cb5233de",
            "max": 434,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_2faa85f196e5414591b6bfe069559965",
            "value": 434
          }
        },
        "7bbe65479ecc4c8abe6f66ebe0fcd439": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e37db1c042df41a3bc04b7c99a7b992f",
            "placeholder": "​",
            "style": "IPY_MODEL_781d845c7035474eabf743c52ac4509a",
            "value": " 434/434 [00:02&lt;00:00, 206.50it/s, Materializing param=model.norm.weight]"
          }
        },
        "2c705c96605847428f68baa2a2831e5b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "caf004f12763403aa8147a6ba7b9110f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "17c046a5112e4eddb8812a98a4ff513d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "7da5a233d937487bb71ec8e6cb5233de": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2faa85f196e5414591b6bfe069559965": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "e37db1c042df41a3bc04b7c99a7b992f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "781d845c7035474eabf743c52ac4509a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "92dd7ad48cfe4a25abfb7ed7ce43e596": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_1abedc7d4a91404283cc04cdf02f11f2",
              "IPY_MODEL_b5c11deca28f46e4a41c9238986bbcc3",
              "IPY_MODEL_8f7522d9081947979d34d3386d09689e"
            ],
            "layout": "IPY_MODEL_6990c87de91a4ad9802e2f2207f116e0"
          }
        },
        "1abedc7d4a91404283cc04cdf02f11f2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a49f51090ece4f588cdd4d80c801388a",
            "placeholder": "​",
            "style": "IPY_MODEL_cc35ecd21f4245e08bee545cd8363f0f",
            "value": "generation_config.json: 100%"
          }
        },
        "b5c11deca28f46e4a41c9238986bbcc3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_07c0d6837604437f94d1f4004f7266c2",
            "max": 242,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_dd6da5b8a5bf4814be339ef588faba1a",
            "value": 242
          }
        },
        "8f7522d9081947979d34d3386d09689e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a6921e563ded4f21b765cfad02b75996",
            "placeholder": "​",
            "style": "IPY_MODEL_a4846bf223c04b68b3013474bd80d57e",
            "value": " 242/242 [00:00&lt;00:00, 32.8kB/s]"
          }
        },
        "6990c87de91a4ad9802e2f2207f116e0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a49f51090ece4f588cdd4d80c801388a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "cc35ecd21f4245e08bee545cd8363f0f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "07c0d6837604437f94d1f4004f7266c2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "dd6da5b8a5bf4814be339ef588faba1a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "a6921e563ded4f21b765cfad02b75996": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a4846bf223c04b68b3013474bd80d57e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "a025bd57d6eb4763ac35a61465bb8f11": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_8c6b2179c0d2475f8482c09c642d530c",
              "IPY_MODEL_fc8f7fe550fc475d86cf7e82daddd4d9",
              "IPY_MODEL_6b8e4dab33734f92869a4e24bdb4e41a"
            ],
            "layout": "IPY_MODEL_6c65ffc393e14d96a1bce8451ebc1704"
          }
        },
        "8c6b2179c0d2475f8482c09c642d530c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c4b975aa14614cd6ab0ca4b047f59e7f",
            "placeholder": "​",
            "style": "IPY_MODEL_3cf86c0829c9439ebb20f56fd0f3c178",
            "value": "Loading weights: 100%"
          }
        },
        "fc8f7fe550fc475d86cf7e82daddd4d9": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_141baed02735407f897cea750084d0c2",
            "max": 434,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_43a102b292ed403eb604446ffafbe28e",
            "value": 434
          }
        },
        "6b8e4dab33734f92869a4e24bdb4e41a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_61289affeccd4c3ba7b4ff84d29033d6",
            "placeholder": "​",
            "style": "IPY_MODEL_5158f4614d1c4411831b1ee44a7609dc",
            "value": " 434/434 [00:02&lt;00:00, 209.93it/s, Materializing param=model.norm.weight]"
          }
        },
        "6c65ffc393e14d96a1bce8451ebc1704": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c4b975aa14614cd6ab0ca4b047f59e7f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3cf86c0829c9439ebb20f56fd0f3c178": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "141baed02735407f897cea750084d0c2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "43a102b292ed403eb604446ffafbe28e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "61289affeccd4c3ba7b4ff84d29033d6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5158f4614d1c4411831b1ee44a7609dc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "IsRwJMOZglA4"
      },
      "outputs": [],
      "source": [
        "# ============================================================\n",
        "# ============================================================\n",
        "# EXPERIMENT OVERVIEW\n",
        "# ============================================================\n",
        "# Experiment 1: Label classification with prompt variants\n",
        "#   Task: classify texts into Hope / Hopelessness / None\n",
        "#   Compare:\n",
        "#       - Direct instruction\n",
        "#       - Zero-shot CoT\n",
        "#       - Few-shot CoT\n",
        "\n",
        "#\n",
        "# Experiment 2: Self-consistency on reasoning-style classification\n",
        "#   Task: same classification, but sample multiple outputs\n",
        "#   Compare:\n",
        "#       - Single generation\n",
        "#       - 5-sample majority vote\n",
        "\n",
        "#\n",
        "# Experiment 3: Simple Tree-of-Thought-style search\n",
        "#   Task: solve small arithmetic / logic problems\n",
        "#   Compare:\n",
        "#       - Direct answer\n",
        "#       - CoT\n",
        "#       - Branch-then-evaluate simple ToT-style method\n",
        "\n",
        "#\n",
        "\n",
        "# ============================================================"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 0. INSTALLATION\n",
        "# ============================================================\n",
        "# In Google Colab, uncomment and run the next line once.\n",
        "#\n",
        "# !pip install -q transformers accelerate bitsandbytes datasets scikit-learn pandas\n",
        "#\n",
        "\n",
        "# - transformers: load tokenizer and model from Hugging Face\n",
        "# - accelerate: helps model execution on GPU\n",
        "# - bitsandbytes: enables 4-bit loading for lower memory use\n",
        "# - datasets: convenient dataset utilities (optional here)\n",
        "# - scikit-learn: metrics like accuracy and F1\n",
        "# - pandas: result tables\n",
        "# ============================================================"
      ],
      "metadata": {
        "id": "qKFN6X-6j6YL"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install -q transformers accelerate bitsandbytes datasets scikit-learn pandas"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "WWgUpzWHj_M7",
        "outputId": "731e35da-05dc-4dad-a207-64a8560a8417"
      },
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.7/60.7 MB\u001b[0m \u001b[31m34.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 1. IMPORTS\n",
        "# ============================================================\n",
        "import re\n",
        "import json\n",
        "import math\n",
        "import random\n",
        "from collections import Counter\n",
        "\n",
        "import pandas as pd\n",
        "from sklearn.metrics import classification_report, accuracy_score, f1_score\n",
        "\n",
        "import torch\n",
        "from transformers import AutoTokenizer, AutoModelForCausalLM"
      ],
      "metadata": {
        "id": "WBF575WOj_vw"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# ------------------------------------------------------------\n",
        "# Reproducibility seed\n",
        "# ------------------------------------------------------------\n",
        "# We set seeds so that experiments are more repeatable.\n",
        "# Important notes:\n",
        "# Even with seeds fixed, LLM generation can still show some variation,\n",
        "# especially if sampling is enabled and some backend operations differ.\n",
        "SEED = 42\n",
        "random.seed(SEED)\n",
        "torch.manual_seed(SEED)\n",
        "if torch.cuda.is_available():\n",
        "    torch.cuda.manual_seed_all(SEED)"
      ],
      "metadata": {
        "id": "KfXqbRCokCfJ"
      },
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 2. MODEL CHOICE\n",
        "# ============================================================\n",
        "# Choose ONE open-source instruct model that is realistic for Colab.\n",
        "#\n",
        "# Good options:\n",
        "# - Qwen/Qwen2.5-3B-Instruct       -> usually the safest balance\n",
        "# - Qwen/Qwen2.5-1.5B-Instruct     -> lighter and faster, lower quality\n",
        "# - google/gemma-2-2b-it           -> also possible, but access/setup may vary\n",
        "# - mistralai/Mistral-7B-Instruct-v0.2 -> stronger, but heavier for Colab\n",
        "#\n",
        "#  for  simplicity:\n",
        "#  Qwen2.5-3B-Instruct.\n",
        "MODEL_NAME = \"Qwen/Qwen2.5-3B-Instruct\"\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "# ------------------------------------------------------------\n",
        "# Device map\n",
        "# ------------------------------------------------------------\n",
        "# device_map=\"auto\" lets Transformers place the model automatically.\n",
        "# In Colab with one GPU, this usually means the model goes to GPU.\n",
        "DEVICE_MAP = \"auto\"\n",
        "\n",
        "# ============================================================\n",
        "# 3. LOAD TOKENIZER AND MODEL\n",
        "# ============================================================\n",
        "print(f\"Loading model: {MODEL_NAME}\")\n",
        "\n",
        "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)\n",
        "\n",
        "# Some tokenizers do not define a padding token by default.\n",
        "# For generation, we often safely reuse eos_token as pad_token.\n",
        "if tokenizer.pad_token is None:\n",
        "    tokenizer.pad_token = tokenizer.eos_token\n",
        "\n",
        "model = AutoModelForCausalLM.from_pretrained(\n",
        "    MODEL_NAME,\n",
        "    device_map=DEVICE_MAP,\n",
        "\n",
        "    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,\n",
        ")\n",
        "\n",
        "print(\"Model loaded successfully.\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 116,
          "referenced_widgets": [
            "23a2c2f641d34f47ba763ddc4c3c30b6",
            "bdede25d1d99431b813997ca27255b07",
            "57cfa3f6bf1b4f099f0faee8550c9e9a",
            "7bbe65479ecc4c8abe6f66ebe0fcd439",
            "2c705c96605847428f68baa2a2831e5b",
            "caf004f12763403aa8147a6ba7b9110f",
            "17c046a5112e4eddb8812a98a4ff513d",
            "7da5a233d937487bb71ec8e6cb5233de",
            "2faa85f196e5414591b6bfe069559965",
            "e37db1c042df41a3bc04b7c99a7b992f",
            "781d845c7035474eabf743c52ac4509a",
            "92dd7ad48cfe4a25abfb7ed7ce43e596",
            "1abedc7d4a91404283cc04cdf02f11f2",
            "b5c11deca28f46e4a41c9238986bbcc3",
            "8f7522d9081947979d34d3386d09689e",
            "6990c87de91a4ad9802e2f2207f116e0",
            "a49f51090ece4f588cdd4d80c801388a",
            "cc35ecd21f4245e08bee545cd8363f0f",
            "07c0d6837604437f94d1f4004f7266c2",
            "dd6da5b8a5bf4814be339ef588faba1a",
            "a6921e563ded4f21b765cfad02b75996",
            "a4846bf223c04b68b3013474bd80d57e"
          ]
        },
        "id": "qjkvY6CekE2h",
        "outputId": "f96af983-092a-455c-ee3a-093cf44b064d"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Loading model: Qwen/Qwen2.5-3B-Instruct\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Loading weights:   0%|          | 0/434 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "23a2c2f641d34f47ba763ddc4c3c30b6"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "92dd7ad48cfe4a25abfb7ed7ce43e596"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model loaded successfully.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "T8LmQIp3leRn"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "# ============================================================\n",
        "# 4. GENERATION FUNCTION\n",
        "# ============================================================\n",
        "# This is the central helper that sends a prompt to the model and\n",
        "# returns generated text.\n",
        "\n",
        "def generate_text(\n",
        "    prompt,\n",
        "    max_new_tokens=128,\n",
        "    temperature=0.0,\n",
        "    do_sample=False,\n",
        "    top_p=1.0,\n",
        "    repetition_penalty=1.0,\n",
        "):\n",
        "    \"\"\"\n",
        "    Generate text from the model.\n",
        "\n",
        "    Parameters\n",
        "    ----------\n",
        "    prompt : str\n",
        "        The full input prompt given to the model.\n",
        "\n",
        "    max_new_tokens : int\n",
        "        Maximum number of NEW tokens the model is allowed to generate.\n",
        "        - Larger values allow longer answers.\n",
        "        - Too large can make generation slower and less controlled.\n",
        "        - For classification, 32-128 is usually enough.\n",
        "        - For reasoning traces, 128-256 may be useful.\n",
        "\n",
        "    temperature : float\n",
        "        Controls randomness of token sampling.\n",
        "        - 0.0 = nearly deterministic / greedy-like behavior\n",
        "        - 0.3 = low randomness\n",
        "        - 0.7 = more creative/diverse\n",
        "        For evaluation experiments, lower temperature is better.\n",
        "        For self-consistency, we intentionally use > 0 so outputs vary.\n",
        "\n",
        "    do_sample : bool\n",
        "        Whether the model samples from the token distribution.\n",
        "        - False = greedy / deterministic decoding behavior\n",
        "        - True  = stochastic decoding\n",
        "        Use False for direct fair comparisons.\n",
        "        Use True for self-consistency because we WANT multiple paths.\n",
        "\n",
        "    top_p : float\n",
        "        Nucleus sampling parameter.\n",
        "        The model samples only from the smallest set of tokens whose\n",
        "        cumulative probability reaches top_p.\n",
        "        - 1.0 means effectively no truncation\n",
        "        - 0.9 keeps only high-probability mass\n",
        "        Typical values: 0.8 to 0.95 when sampling.\n",
        "\n",
        "    repetition_penalty : float\n",
        "        Penalizes repeated token generation.\n",
        "        - 1.0 = no penalty\n",
        "        - >1.0 discourages repetition\n",
        "        Here we keep it simple, usually 1.0 or 1.05.\n",
        "    \"\"\"\n",
        "\n",
        "    # Convert text prompt into token ids.\n",
        "    inputs = tokenizer(prompt, return_tensors=\"pt\", padding=True, truncation=True)\n",
        "\n",
        "    # Move inputs to the same device as the model when possible.\n",
        "    inputs = {k: v.to(model.device) for k, v in inputs.items()}\n",
        "\n",
        "    with torch.no_grad():\n",
        "        outputs = model.generate(\n",
        "            **inputs,\n",
        "            max_new_tokens=max_new_tokens,\n",
        "            temperature=temperature,\n",
        "            do_sample=do_sample,\n",
        "            top_p=top_p,\n",
        "            repetition_penalty=repetition_penalty,\n",
        "            pad_token_id=tokenizer.pad_token_id,\n",
        "            eos_token_id=tokenizer.eos_token_id,\n",
        "        )\n",
        "\n",
        "    # Decode output tokens back to text.\n",
        "    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
        "\n",
        "    # Return only the newly generated part when possible.\n",
        "    if decoded.startswith(prompt):\n",
        "        return decoded[len(prompt):].strip()\n",
        "    return decoded.strip()\n"
      ],
      "metadata": {
        "id": "2PX8xpm9kNPf"
      },
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 5. SMALL TOY DATASET FOR CLASSIFICATION\n",
        "# ============================================================\n",
        "# We keep the dataset tiny on purpose for easy use.\n",
        "# Students can extend it later.\n",
        "\n",
        "classification_data = [\n",
        "    {\"text\": \"I believe things can improve and I want to keep trying.\", \"label\": \"Hope\"},\n",
        "    {\"text\": \"Next year will be better for my family, I can feel it.\", \"label\": \"Hope\"},\n",
        "    {\"text\": \"I still have a chance to fix my life if I work hard.\", \"label\": \"Hope\"},\n",
        "    {\"text\": \"Nothing will change, everything is ruined already.\", \"label\": \"Hopelessness\"},\n",
        "    {\"text\": \"I do not see any future for myself anymore.\", \"label\": \"Hopelessness\"},\n",
        "    {\"text\": \"No matter what I do, things always get worse.\", \"label\": \"Hopelessness\"},\n",
        "    {\"text\": \"I bought bread, milk, and eggs from the store.\", \"label\": \"None\"},\n",
        "    {\"text\": \"The weather is warm today and the bus arrived late.\", \"label\": \"None\"},\n",
        "    {\"text\": \"She sent me the report before lunch.\", \"label\": \"None\"},\n",
        "]\n",
        "\n",
        "df_cls = pd.DataFrame(classification_data)\n",
        "print(df_cls)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "zoJSxsXwlvhr",
        "outputId": "7707aab4-00dc-49aa-fe31-e5cc90a327fc"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "                                                text         label\n",
            "0  I believe things can improve and I want to kee...          Hope\n",
            "1  Next year will be better for my family, I can ...          Hope\n",
            "2  I still have a chance to fix my life if I work...          Hope\n",
            "3  Nothing will change, everything is ruined alre...  Hopelessness\n",
            "4        I do not see any future for myself anymore.  Hopelessness\n",
            "5      No matter what I do, things always get worse.  Hopelessness\n",
            "6     I bought bread, milk, and eggs from the store.          None\n",
            "7  The weather is warm today and the bus arrived ...          None\n",
            "8               She sent me the report before lunch.          None\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 6. LABEL EXTRACTION HELPER\n",
        "# ============================================================\n",
        "# LLMs do not always follow instructions perfectly.\n",
        "# This helper tries to extract one label from the output.\n",
        "\n",
        "def extract_label(output_text):\n",
        "    \"\"\"\n",
        "    Extract a label from model output.\n",
        "\n",
        "    Allowed labels:\n",
        "    - Hope\n",
        "    - Hopelessness\n",
        "    - None\n",
        "\n",
        "    We use a simple rule-based parser because the goal here is clarity.\n",
        "    \"\"\"\n",
        "    text = output_text.strip().lower()\n",
        "\n",
        "    # Search in an order that avoids confusion.\n",
        "    if \"hopelessness\" in text:\n",
        "        return \"Hopelessness\"\n",
        "    if re.search(r\"\\bhope\\b\", text):\n",
        "        return \"Hope\"\n",
        "    if re.search(r\"\\bnone\\b\", text):\n",
        "        return \"None\"\n",
        "\n",
        "    # Fallback heuristic when the model does not obey format.\n",
        "    return \"UNKNOWN\"\n"
      ],
      "metadata": {
        "id": "9q5yKEM3l0bg"
      },
      "execution_count": 8,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 7. PROMPT TEMPLATES\n",
        "# ============================================================\n",
        "# Here we define several prompting styles aligned with the topics\n",
        "#  direct prompting, CoT, few-shot CoT.\n",
        "\n",
        "DIRECT_PROMPT_TEMPLATE = \"\"\"\n",
        "You are a careful NLP classifier.\n",
        "Classify the following text into exactly one label:\n",
        "Hope, Hopelessness, None.\n",
        "\n",
        "Return only the label.\n",
        "\n",
        "Text: {text}\n",
        "Label:\n",
        "\"\"\".strip()\n",
        "\n",
        "ZERO_SHOT_COT_TEMPLATE = \"\"\"\n",
        "You are a careful NLP classifier.\n",
        "Classify the following text into exactly one label:\n",
        "Hope, Hopelessness, None.\n",
        "\n",
        "Reason briefly and then give the final label in the format:\n",
        "Reasoning: ...\n",
        "Label: <one label>\n",
        "\n",
        "Let's think step by step.\n",
        "\n",
        "Text: {text}\n",
        "\"\"\".strip()\n",
        "\n",
        "FEW_SHOT_COT_TEMPLATE = \"\"\"\n",
        "You are a careful NLP classifier.\n",
        "Classify the following text into exactly one label:\n",
        "Hope, Hopelessness, None.\n",
        "\n",
        "Use the examples to understand how to reason.\n",
        "Then answer in the format:\n",
        "Reasoning: ...\n",
        "Label: <one label>\n",
        "\n",
        "Example 1:\n",
        "Text: I know recovery will be slow, but I still think life can improve.\n",
        "Reasoning: The speaker expresses a positive future-oriented belief that improvement is possible. That is hope.\n",
        "Label: Hope\n",
        "\n",
        "Example 2:\n",
        "Text: There is no point in trying anymore because nothing will ever get better.\n",
        "Reasoning: The speaker expresses despair and no expectation of improvement. That is hopelessness.\n",
        "Label: Hopelessness\n",
        "\n",
        "Example 3:\n",
        "Text: I cleaned the kitchen and answered two emails.\n",
        "Reasoning: This is a factual statement without hope or despair.\n",
        "Label: None\n",
        "\n",
        "Now classify the following text.\n",
        "\n",
        "Text: {text}\n",
        "\"\"\".strip()\n",
        "\n"
      ],
      "metadata": {
        "id": "Swq2P2Dml66O"
      },
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 8. SINGLE-EXPERIMENT RUNNER\n",
        "# ============================================================\n",
        "\n",
        "def run_classification_experiment(prompt_template, experiment_name, temperature=0.0, do_sample=False):\n",
        "    \"\"\"\n",
        "    Run one classification experiment over the toy dataset.\n",
        "\n",
        "    Returns a dataframe with predictions and prints metrics.\n",
        "    \"\"\"\n",
        "    results = []\n",
        "\n",
        "    print(\"=\" * 70)\n",
        "    print(f\"RUNNING: {experiment_name}\")\n",
        "    print(\"=\" * 70)\n",
        "\n",
        "    for row in classification_data:\n",
        "        prompt = prompt_template.format(text=row[\"text\"])\n",
        "        output = generate_text(\n",
        "            prompt=prompt,\n",
        "            max_new_tokens=80,\n",
        "            temperature=temperature,\n",
        "            do_sample=do_sample,\n",
        "            top_p=0.9,\n",
        "            repetition_penalty=1.0,\n",
        "        )\n",
        "        pred = extract_label(output)\n",
        "\n",
        "        results.append(\n",
        "            {\n",
        "                \"text\": row[\"text\"],\n",
        "                \"gold\": row[\"label\"],\n",
        "                \"raw_output\": output,\n",
        "                \"pred\": pred,\n",
        "            }\n",
        "        )\n",
        "\n",
        "    df_res = pd.DataFrame(results)\n",
        "    print(df_res[[\"text\", \"gold\", \"pred\"]])\n",
        "\n",
        "    y_true = df_res[\"gold\"].tolist()\n",
        "    y_pred = df_res[\"pred\"].tolist()\n",
        "\n",
        "    print(\"\\nAccuracy:\", accuracy_score(y_true, y_pred))\n",
        "    print(\"Macro F1:\", f1_score(y_true, y_pred, average=\"macro\", labels=[\"Hope\", \"Hopelessness\", \"None\"]))\n",
        "    print(\"\\nClassification report:\")\n",
        "    print(classification_report(y_true, y_pred, labels=[\"Hope\", \"Hopelessness\", \"None\"], zero_division=0))\n",
        "\n",
        "    return df_res\n"
      ],
      "metadata": {
        "id": "rZ1kZkF4l_B4"
      },
      "execution_count": 10,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 9. EXPERIMENT 1A — DIRECT PROMPTING BASELINE\n",
        "# ============================================================\n",
        "# This is the simplest baseline.\n",
        "# No explicit reasoning, no examples, no branching.\n",
        "\n",
        "df_direct = run_classification_experiment(\n",
        "    prompt_template=DIRECT_PROMPT_TEMPLATE,\n",
        "    experiment_name=\"Experiment 1A - Direct Prompting\",\n",
        "    temperature=0.0,\n",
        "    do_sample=False,\n",
        ")\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "sjBCT4wml0Ua",
        "outputId": "2cbd0548-bf91-4750-c1bc-66035e172335"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "======================================================================\n",
            "RUNNING: Experiment 1A - Direct Prompting\n",
            "======================================================================\n",
            "                                                text          gold  \\\n",
            "0  I believe things can improve and I want to kee...          Hope   \n",
            "1  Next year will be better for my family, I can ...          Hope   \n",
            "2  I still have a chance to fix my life if I work...          Hope   \n",
            "3  Nothing will change, everything is ruined alre...  Hopelessness   \n",
            "4        I do not see any future for myself anymore.  Hopelessness   \n",
            "5      No matter what I do, things always get worse.  Hopelessness   \n",
            "6     I bought bread, milk, and eggs from the store.          None   \n",
            "7  The weather is warm today and the bus arrived ...          None   \n",
            "8               She sent me the report before lunch.          None   \n",
            "\n",
            "           pred  \n",
            "0  Hopelessness  \n",
            "1          Hope  \n",
            "2  Hopelessness  \n",
            "3  Hopelessness  \n",
            "4  Hopelessness  \n",
            "5  Hopelessness  \n",
            "6  Hopelessness  \n",
            "7  Hopelessness  \n",
            "8  Hopelessness  \n",
            "\n",
            "Accuracy: 0.4444444444444444\n",
            "Macro F1: 0.34848484848484845\n",
            "\n",
            "Classification report:\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "        Hope       1.00      0.33      0.50         3\n",
            "Hopelessness       0.38      1.00      0.55         3\n",
            "        None       0.00      0.00      0.00         3\n",
            "\n",
            "    accuracy                           0.44         9\n",
            "   macro avg       0.46      0.44      0.35         9\n",
            "weighted avg       0.46      0.44      0.35         9\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 10. EXPERIMENT 1B — ZERO-SHOT CHAIN-OF-THOUGHT\n",
        "# ============================================================\n",
        "# Here we add the classic trigger \"Let's think step by step.\"\n",
        "# This checks whether reasoning language helps even without examples.\n",
        "\n",
        "df_zs_cot = run_classification_experiment(\n",
        "    prompt_template=ZERO_SHOT_COT_TEMPLATE,\n",
        "    experiment_name=\"Experiment 1B - Zero-shot CoT\",\n",
        "    temperature=0.0,\n",
        "    do_sample=False,\n",
        ")\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "XBQrt-vxmGYc",
        "outputId": "793ecd48-e1d8-4a1e-99bb-acb3886d49cc"
      },
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "======================================================================\n",
            "RUNNING: Experiment 1B - Zero-shot CoT\n",
            "======================================================================\n",
            "                                                text          gold  \\\n",
            "0  I believe things can improve and I want to kee...          Hope   \n",
            "1  Next year will be better for my family, I can ...          Hope   \n",
            "2  I still have a chance to fix my life if I work...          Hope   \n",
            "3  Nothing will change, everything is ruined alre...  Hopelessness   \n",
            "4        I do not see any future for myself anymore.  Hopelessness   \n",
            "5      No matter what I do, things always get worse.  Hopelessness   \n",
            "6     I bought bread, milk, and eggs from the store.          None   \n",
            "7  The weather is warm today and the bus arrived ...          None   \n",
            "8               She sent me the report before lunch.          None   \n",
            "\n",
            "           pred  \n",
            "0  Hopelessness  \n",
            "1  Hopelessness  \n",
            "2  Hopelessness  \n",
            "3  Hopelessness  \n",
            "4  Hopelessness  \n",
            "5  Hopelessness  \n",
            "6  Hopelessness  \n",
            "7  Hopelessness  \n",
            "8  Hopelessness  \n",
            "\n",
            "Accuracy: 0.3333333333333333\n",
            "Macro F1: 0.16666666666666666\n",
            "\n",
            "Classification report:\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "        Hope       0.00      0.00      0.00         3\n",
            "Hopelessness       0.33      1.00      0.50         3\n",
            "        None       0.00      0.00      0.00         3\n",
            "\n",
            "    accuracy                           0.33         9\n",
            "   macro avg       0.11      0.33      0.17         9\n",
            "weighted avg       0.11      0.33      0.17         9\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 11. EXPERIMENT 1C — FEW-SHOT CHAIN-OF-THOUGHT\n",
        "# ============================================================\n",
        "# Here we provide demonstration examples with reasoning.\n",
        "# This teaches the model HOW to answer, not just WHAT labels exist.\n",
        "\n",
        "df_fs_cot = run_classification_experiment(\n",
        "    prompt_template=FEW_SHOT_COT_TEMPLATE,\n",
        "    experiment_name=\"Experiment 1C - Few-shot CoT\",\n",
        "    temperature=0.0,\n",
        "    do_sample=False,\n",
        ")\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "G1Jp8kadmKNc",
        "outputId": "cd8efd02-d766-40f9-a6c2-5b52f3f8eb2e"
      },
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "======================================================================\n",
            "RUNNING: Experiment 1C - Few-shot CoT\n",
            "======================================================================\n",
            "                                                text          gold  \\\n",
            "0  I believe things can improve and I want to kee...          Hope   \n",
            "1  Next year will be better for my family, I can ...          Hope   \n",
            "2  I still have a chance to fix my life if I work...          Hope   \n",
            "3  Nothing will change, everything is ruined alre...  Hopelessness   \n",
            "4        I do not see any future for myself anymore.  Hopelessness   \n",
            "5      No matter what I do, things always get worse.  Hopelessness   \n",
            "6     I bought bread, milk, and eggs from the store.          None   \n",
            "7  The weather is warm today and the bus arrived ...          None   \n",
            "8               She sent me the report before lunch.          None   \n",
            "\n",
            "           pred  \n",
            "0  Hopelessness  \n",
            "1          Hope  \n",
            "2          Hope  \n",
            "3  Hopelessness  \n",
            "4  Hopelessness  \n",
            "5  Hopelessness  \n",
            "6          Hope  \n",
            "7          Hope  \n",
            "8          Hope  \n",
            "\n",
            "Accuracy: 0.5555555555555556\n",
            "Macro F1: 0.4523809523809524\n",
            "\n",
            "Classification report:\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "        Hope       0.40      0.67      0.50         3\n",
            "Hopelessness       0.75      1.00      0.86         3\n",
            "        None       0.00      0.00      0.00         3\n",
            "\n",
            "    accuracy                           0.56         9\n",
            "   macro avg       0.38      0.56      0.45         9\n",
            "weighted avg       0.38      0.56      0.45         9\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 12. COMPARE EXPERIMENT 1 RESULTS\n",
        "# ============================================================\n",
        "\n",
        "def summarize_result(df_res, name):\n",
        "    y_true = df_res[\"gold\"].tolist()\n",
        "    y_pred = df_res[\"pred\"].tolist()\n",
        "    return {\n",
        "        \"experiment\": name,\n",
        "        \"accuracy\": accuracy_score(y_true, y_pred),\n",
        "        \"macro_f1\": f1_score(y_true, y_pred, average=\"macro\", labels=[\"Hope\", \"Hopelessness\", \"None\"]),\n",
        "    }\n",
        "\n",
        "comparison_1 = pd.DataFrame([\n",
        "    summarize_result(df_direct, \"Direct\"),\n",
        "    summarize_result(df_zs_cot, \"Zero-shot CoT\"),\n",
        "    summarize_result(df_fs_cot, \"Few-shot CoT\"),\n",
        "])\n",
        "\n",
        "print(\"\\nComparison of Experiment 1 variants:\")\n",
        "print(comparison_1)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Hvh8bnClmNq-",
        "outputId": "12cda39e-e975-4b9d-ab42-853c3050e19a"
      },
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "Comparison of Experiment 1 variants:\n",
            "      experiment  accuracy  macro_f1\n",
            "0         Direct  0.444444  0.348485\n",
            "1  Zero-shot CoT  0.333333  0.166667\n",
            "2   Few-shot CoT  0.555556  0.452381\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 13. EXPERIMENT 2 — SELF-CONSISTENCY\n",
        "# ============================================================\n",
        "# Idea:\n",
        "# Instead of trusting one sampled generation, ask the model multiple times\n",
        "# with sampling enabled, then choose the majority answer.\n",
        "#\n",
        "# Why sampling is necessary here:\n",
        "# If do_sample=False and temperature=0, the model will usually generate the\n",
        "# same answer every time. Then there is nothing to vote over.\n",
        "#\n",
        "# So for self-consistency we intentionally use:\n",
        "# - do_sample=True\n",
        "# - temperature > 0\n",
        "# - several generations (e.g., 5)\n",
        "\n",
        "def self_consistency_predict(prompt, n_samples=5, temperature=0.7, top_p=0.9):\n",
        "    \"\"\"\n",
        "    Generate multiple outputs and use majority voting over extracted labels.\n",
        "\n",
        "    Parameters\n",
        "    ----------\n",
        "    n_samples : int\n",
        "        Number of independent sampled generations.\n",
        "        - 3 = small and fast\n",
        "        - 5 = a good classroom default\n",
        "        - 10+ = more stable but slower\n",
        "\n",
        "    temperature : float\n",
        "        Higher temperature increases diversity of reasoning paths.\n",
        "        Too high can make outputs noisy.\n",
        "        0.7 is a common compromise.\n",
        "\n",
        "    top_p : float\n",
        "        Keeps sampling within the most likely cumulative probability mass.\n",
        "        0.9 is a common default.\n",
        "    \"\"\"\n",
        "    labels = []\n",
        "    raw_outputs = []\n",
        "\n",
        "    for _ in range(n_samples):\n",
        "        output = generate_text(\n",
        "            prompt=prompt,\n",
        "            max_new_tokens=80,\n",
        "            temperature=temperature,\n",
        "            do_sample=True,\n",
        "            top_p=top_p,\n",
        "            repetition_penalty=1.0,\n",
        "        )\n",
        "        raw_outputs.append(output)\n",
        "        labels.append(extract_label(output))\n",
        "\n",
        "    vote_counts = Counter(labels)\n",
        "    final_label = vote_counts.most_common(1)[0][0]\n",
        "\n",
        "    return final_label, labels, raw_outputs\n",
        "\n",
        "# ------------------------------------------------------------\n",
        "# We apply self-consistency to the zero-shot CoT prompt.\n",
        "# This is pedagogically simple: same prompting idea, but more robust decoding.\n",
        "# ------------------------------------------------------------\n",
        "results_sc = []\n",
        "for row in classification_data:\n",
        "    prompt = ZERO_SHOT_COT_TEMPLATE.format(text=row[\"text\"])\n",
        "    final_pred, sampled_labels, sampled_outputs = self_consistency_predict(\n",
        "        prompt,\n",
        "        n_samples=5,\n",
        "        temperature=0.7,\n",
        "        top_p=0.9,\n",
        "    )\n",
        "    results_sc.append(\n",
        "        {\n",
        "            \"text\": row[\"text\"],\n",
        "            \"gold\": row[\"label\"],\n",
        "            \"pred\": final_pred,\n",
        "            \"sampled_labels\": sampled_labels,\n",
        "            \"sampled_outputs\": sampled_outputs,\n",
        "        }\n",
        "    )\n",
        "\n",
        "df_sc = pd.DataFrame(results_sc)\n",
        "print(df_sc[[\"text\", \"gold\", \"pred\", \"sampled_labels\"]])\n",
        "\n",
        "print(\"\\nExperiment 2 - Self-consistency metrics\")\n",
        "print(\"Accuracy:\", accuracy_score(df_sc[\"gold\"], df_sc[\"pred\"]))\n",
        "print(\"Macro F1:\", f1_score(df_sc[\"gold\"], df_sc[\"pred\"], average=\"macro\", labels=[\"Hope\", \"Hopelessness\", \"None\"]))\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "311D05sBmGGI",
        "outputId": "3b66b75f-94e3-4354-81f8-f99f5beed825"
      },
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "                                                text          gold  \\\n",
            "0  I believe things can improve and I want to kee...          Hope   \n",
            "1  Next year will be better for my family, I can ...          Hope   \n",
            "2  I still have a chance to fix my life if I work...          Hope   \n",
            "3  Nothing will change, everything is ruined alre...  Hopelessness   \n",
            "4        I do not see any future for myself anymore.  Hopelessness   \n",
            "5      No matter what I do, things always get worse.  Hopelessness   \n",
            "6     I bought bread, milk, and eggs from the store.          None   \n",
            "7  The weather is warm today and the bus arrived ...          None   \n",
            "8               She sent me the report before lunch.          None   \n",
            "\n",
            "           pred                                     sampled_labels  \n",
            "0  Hopelessness  [Hopelessness, Hopelessness, Hopelessness, Hop...  \n",
            "1  Hopelessness  [Hopelessness, Hopelessness, Hopelessness, Hop...  \n",
            "2  Hopelessness  [Hopelessness, Hopelessness, Hopelessness, Hop...  \n",
            "3  Hopelessness  [Hopelessness, Hopelessness, Hopelessness, Hop...  \n",
            "4  Hopelessness  [Hopelessness, Hopelessness, Hopelessness, Hop...  \n",
            "5  Hopelessness  [Hopelessness, Hopelessness, Hopelessness, Hop...  \n",
            "6  Hopelessness  [Hopelessness, Hopelessness, Hopelessness, Hop...  \n",
            "7          Hope     [Hope, Hope, Hope, Hopelessness, Hopelessness]  \n",
            "8  Hopelessness  [Hopelessness, Hopelessness, Hopelessness, Hop...  \n",
            "\n",
            "Experiment 2 - Self-consistency metrics\n",
            "Accuracy: 0.3333333333333333\n",
            "Macro F1: 0.1818181818181818\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 14. SMALL REASONING DATASET FOR EXPERIMENT 3\n",
        "# ============================================================\n",
        "# For Tree-of-Thought-style comparison, a tiny arithmetic dataset is easier\n",
        "# than a complex benchmark. The goal is to demonstrate the idea.\n",
        "\n",
        "reasoning_data = [\n",
        "    {\"question\": \"Roger has 5 tennis balls. He buys 2 cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?\", \"answer\": \"11\"},\n",
        "    {\"question\": \"A cafeteria had 23 apples. It used 20 apples to make lunch and then bought 6 more apples. How many apples does it have now?\", \"answer\": \"9\"},\n",
        "    {\"question\": \"I have 10 apples, I gave away 2 and then ate 1. How many apples do I have left?\", \"answer\": \"7\"},\n",
        "]\n"
      ],
      "metadata": {
        "id": "hXkbK_SCmZ-1"
      },
      "execution_count": 16,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 15. PROMPTS FOR REASONING TASKS\n",
        "# ============================================================\n",
        "DIRECT_REASONING_TEMPLATE = \"\"\"\n",
        "Answer the question with only the final numeric answer.\n",
        "\n",
        "Question: {question}\n",
        "Answer:\n",
        "\"\"\".strip()\n",
        "\n",
        "COT_REASONING_TEMPLATE = \"\"\"\n",
        "Solve the problem step by step.\n",
        "Then provide the final answer in the format:\n",
        "Final Answer: <number>\n",
        "\n",
        "Question: {question}\n",
        "\"\"\".strip()\n",
        "\n"
      ],
      "metadata": {
        "id": "UqE-3SOWmdSJ"
      },
      "execution_count": 17,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 16. EXTRACT FINAL NUMBER\n",
        "# ============================================================\n",
        "\n",
        "def extract_final_number(text):\n",
        "    \"\"\"\n",
        "    Extract the last integer from a generated answer.\n",
        "    This is a simple parser suitable for toy arithmetic tasks.\n",
        "    \"\"\"\n",
        "    nums = re.findall(r\"-?\\d+\", text)\n",
        "    return nums[-1] if nums else \"UNKNOWN\"\n",
        "\n",
        "# ============================================================\n",
        "# 17. SIMPLE TOT-STYLE METHOD\n",
        "# ============================================================\n",
        "# This is NOT a full Tree-of-Thought implementation from the literature.\n",
        "# Instead, it is a simple  approximation:\n",
        "#   Step 1: Ask the model to propose multiple candidate reasoning paths.\n",
        "#   Step 2: Ask the model to evaluate which path is best.\n",
        "#   Step 3: Extract the answer from the chosen path.\n",
        "#\n",
        "# Why use this simplification?\n",
        "# - Easier to understand\n",
        "# - Easy to implement in one notebook\n",
        "# - Works in Colab\n",
        "# - Sufficient to demonstrate branching + evaluation\n",
        "\n",
        "def tot_style_solve(question):\n",
        "    \"\"\"\n",
        "    Simplified Tree-of-Thought-style solver.\n",
        "    \"\"\"\n",
        "\n",
        "    # --------------------------------------------------------\n",
        "    # Phase A: generate candidate paths\n",
        "    # --------------------------------------------------------\n",
        "    # We ask for three different candidate solution paths.\n",
        "    # This introduces branching.\n",
        "    branch_prompt = f\"\"\"\n",
        "You are solving a small arithmetic reasoning problem.\n",
        "Propose 3 different candidate reasoning paths.\n",
        "Each path should be short.\n",
        "At the end of each path, include a line of the form:\n",
        "Answer: <number>\n",
        "\n",
        "Question: {question}\n",
        "\"\"\".strip()\n",
        "\n",
        "    branch_output = generate_text(\n",
        "        prompt=branch_prompt,\n",
        "        max_new_tokens=220,\n",
        "        temperature=0.7,\n",
        "        do_sample=True,\n",
        "        top_p=0.9,\n",
        "        repetition_penalty=1.0,\n",
        "    )\n",
        "\n",
        "    # --------------------------------------------------------\n",
        "    # Phase B: evaluate paths\n",
        "    # --------------------------------------------------------\n",
        "    # Now we ask the same model to act as a judge.\n",
        "    # In more advanced setups, a separate evaluator could be used.\n",
        "    judge_prompt = f\"\"\"\n",
        "You are now an evaluator.\n",
        "Below are candidate solution paths for a question.\n",
        "Choose the best path based on logical correctness.\n",
        "Return your result in exactly this format:\n",
        "Best Path: <path number>\n",
        "Final Answer: <number>\n",
        "\n",
        "Question: {question}\n",
        "\n",
        "Candidate paths:\n",
        "{branch_output}\n",
        "\"\"\".strip()\n",
        "\n",
        "    judge_output = generate_text(\n",
        "        prompt=judge_prompt,\n",
        "        max_new_tokens=120,\n",
        "        temperature=0.0,\n",
        "        do_sample=False,\n",
        "        top_p=1.0,\n",
        "        repetition_penalty=1.0,\n",
        "    )\n",
        "\n",
        "    final_answer = extract_final_number(judge_output)\n",
        "\n",
        "    return {\n",
        "        \"branch_output\": branch_output,\n",
        "        \"judge_output\": judge_output,\n",
        "        \"final_answer\": final_answer,\n",
        "    }\n",
        "\n"
      ],
      "metadata": {
        "id": "R7OEIq9hmi4z"
      },
      "execution_count": 18,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 18. RUN EXPERIMENT 3\n",
        "# ============================================================\n",
        "reasoning_results = []\n",
        "\n",
        "for item in reasoning_data:\n",
        "    q = item[\"question\"]\n",
        "    gold = item[\"answer\"]\n",
        "\n",
        "    # Direct\n",
        "    direct_prompt = DIRECT_REASONING_TEMPLATE.format(question=q)\n",
        "    direct_output = generate_text(\n",
        "        prompt=direct_prompt,\n",
        "        max_new_tokens=32,\n",
        "        temperature=0.0,\n",
        "        do_sample=False,\n",
        "    )\n",
        "    direct_pred = extract_final_number(direct_output)\n",
        "\n",
        "    # CoT\n",
        "    cot_prompt = COT_REASONING_TEMPLATE.format(question=q)\n",
        "    cot_output = generate_text(\n",
        "        prompt=cot_prompt,\n",
        "        max_new_tokens=128,\n",
        "        temperature=0.0,\n",
        "        do_sample=False,\n",
        "    )\n",
        "    cot_pred = extract_final_number(cot_output)\n",
        "\n",
        "    # ToT-style\n",
        "    tot_result = tot_style_solve(q)\n",
        "    tot_pred = tot_result[\"final_answer\"]\n",
        "\n",
        "    reasoning_results.append(\n",
        "        {\n",
        "            \"question\": q,\n",
        "            \"gold\": gold,\n",
        "            \"direct_pred\": direct_pred,\n",
        "            \"cot_pred\": cot_pred,\n",
        "            \"tot_pred\": tot_pred,\n",
        "            \"direct_output\": direct_output,\n",
        "            \"cot_output\": cot_output,\n",
        "            \"tot_branch_output\": tot_result[\"branch_output\"],\n",
        "            \"tot_judge_output\": tot_result[\"judge_output\"],\n",
        "        }\n",
        "    )\n",
        "\n",
        "df_reason = pd.DataFrame(reasoning_results)\n",
        "print(df_reason[[\"question\", \"gold\", \"direct_pred\", \"cot_pred\", \"tot_pred\"]])\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "DV9LIbN3mmSc",
        "outputId": "15fbbad1-ac1c-4274-c671-2995c01a8448"
      },
      "execution_count": 19,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "The following generation flags are not valid and may be ignored: ['temperature', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "                                            question gold direct_pred  \\\n",
            "0  Roger has 5 tennis balls. He buys 2 cans of te...   11           1   \n",
            "1  A cafeteria had 23 apples. It used 20 apples t...    9           2   \n",
            "2  I have 10 apples, I gave away 2 and then ate 1...    7           1   \n",
            "\n",
            "  cot_pred tot_pred  \n",
            "0        2        4  \n",
            "1        9        3  \n",
            "2       10        2  \n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 19. SIMPLE ACCURACY COMPARISON FOR EXPERIMENT 3\n",
        "# ============================================================\n",
        "\n",
        "def simple_exact_match(gold_list, pred_list):\n",
        "    correct = 0\n",
        "    for g, p in zip(gold_list, pred_list):\n",
        "        if str(g).strip() == str(p).strip():\n",
        "            correct += 1\n",
        "    return correct / len(gold_list)\n",
        "\n",
        "reason_summary = pd.DataFrame([\n",
        "    {\n",
        "        \"method\": \"Direct\",\n",
        "        \"accuracy\": simple_exact_match(df_reason[\"gold\"], df_reason[\"direct_pred\"]),\n",
        "    },\n",
        "    {\n",
        "        \"method\": \"CoT\",\n",
        "        \"accuracy\": simple_exact_match(df_reason[\"gold\"], df_reason[\"cot_pred\"]),\n",
        "    },\n",
        "    {\n",
        "        \"method\": \"Simple ToT-style\",\n",
        "        \"accuracy\": simple_exact_match(df_reason[\"gold\"], df_reason[\"tot_pred\"]),\n",
        "    },\n",
        "])\n",
        "\n",
        "print(\"\\nExperiment 3 summary:\")\n",
        "print(reason_summary)\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "AaRFVtPxmW0X",
        "outputId": "54375e56-ab00-4c9d-f029-ec06fc5b9701"
      },
      "execution_count": 20,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "Experiment 3 summary:\n",
            "             method  accuracy\n",
            "0            Direct  0.000000\n",
            "1               CoT  0.333333\n",
            "2  Simple ToT-style  0.000000\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 20. OPTIONAL: SAVE RESULTS\n",
        "# ============================================================\n",
        "# Saving outputs is useful for class discussion and grading.\n",
        "# Students can inspect mistakes after the run.\n",
        "\n",
        "comparison_1.to_csv(\"experiment1_prompt_comparison.csv\", index=False)\n",
        "df_sc.to_csv(\"experiment2_self_consistency.csv\", index=False)\n",
        "df_reason.to_csv(\"experiment3_reasoning_comparison.csv\", index=False)\n",
        "\n",
        "print(\"\\nSaved result files:\")\n",
        "print(\"- experiment1_prompt_comparison.csv\")\n",
        "print(\"- experiment2_self_consistency.csv\")\n",
        "print(\"- experiment3_reasoning_comparison.csv\")\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "0MnZzn_KmqY1",
        "outputId": "cbe2b53a-66d0-4e2c-876d-929386fdae33"
      },
      "execution_count": 21,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "Saved result files:\n",
            "- experiment1_prompt_comparison.csv\n",
            "- experiment2_self_consistency.csv\n",
            "- experiment3_reasoning_comparison.csv\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "9r19o5tblvY3"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "# ============================================================\n",
        "# 21.  DISCUSSION QUESTIONS\n",
        "# ============================================================\n",
        "#\n",
        "# 1) Did Zero-shot CoT help more than direct prompting?\n",
        "#    - If yes, why?\n",
        "#    - If not, why might the task be too simple?\n",
        "#\n",
        "# 2) Did Few-shot CoT help more than Zero-shot CoT?\n",
        "#    - What do the examples teach the model?\n",
        "#    - Does the model imitate reasoning structure?\n",
        "#\n",
        "# 3) Did self-consistency improve robustness?\n",
        "#    - Were sampled outputs diverse?\n",
        "#    - Did majority vote remove noisy answers?\n",
        "#\n",
        "# 4) Did the simple ToT-style method help on arithmetic questions?\n",
        "#    - When does branching help?\n",
        "#    - When does it just add cost?\n",
        "#\n",
        "# 5) What are the tradeoffs?\n",
        "#    - Direct prompting: cheap and fast\n",
        "#    - CoT: better reasoning, longer output\n",
        "#    - Self-consistency: more stable, more expensive\n",
        "#    - ToT-style: more exploratory, more expensive and more complex\n",
        "#\n",
        "# ============================================================\n",
        "# 22. EASY EXTENSIONS\n",
        "# ============================================================\n",
        "# Experiments can be extended by:\n",
        "# - Trying another open-source model (e.g., Qwen 1.5B vs 3B)\n",
        "# - Increasing the dataset size\n",
        "# - Testing Spanish examples instead of English\n",
        "# - Using sentiment labels instead of Hope/Hopelessness/None\n",
        "# - Comparing role prompting vs no role prompting\n",
        "# - Adding structured JSON output with validation\n",
        "# - Measuring latency per prompting strategy\n",
        "\n"
      ],
      "metadata": {
        "id": "OaSu5JMYlfDI"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "08XXTIB4prAv"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "#\n",
        "# Goal:\n",
        "# This script is designed for experiments on prompting methods\n",
        "# using a SIMPLE sentiment analysis task\n",
        "#\n",
        "# Main idea:\n",
        "# We want experiments to SEE differences between:\n",
        "#   1) Direct prompting\n",
        "#   2) Role prompting\n",
        "#   3) Zero-shot Chain-of-Thought (CoT)\n",
        "#   4) Few-shot prompting\n",
        "#   5) Few-shot CoT\n",
        "#   6) Self-consistency\n",
        "#   7) Output-constrained JSON prompting\n",
        "#\n",
        "\n",
        "#\n",
        "# ============================================================\n",
        "# EXPERIMENT OVERVIEW\n",
        "# ============================================================\n",
        "# Experiment 1: Direct prompting baseline\n",
        "# Experiment 2: Role prompting\n",
        "# Experiment 3: Zero-shot CoT\n",
        "# Experiment 4: Few-shot prompting\n",
        "# Experiment 5: Few-shot CoT\n",
        "# Experiment 6: Self-consistency with sampled decoding\n",
        "# Experiment 7: Structured JSON output + validation\n",
        "#\n",
        "# Task:\n",
        "# Classify text into one of three labels:\n",
        "#   - positive\n",
        "#   - negative\n",
        "#   - neutral\n",
        "#\n",
        "# We intentionally use a small custom dataset so the notebook remains easy\n",
        "# to run and inspect. Students can later replace it with SST-2, SST-5,\n",
        "# TweetEval sentiment, or another dataset.\n",
        "# ============================================================\n",
        "\n",
        "# ============================================================\n",
        "# 0. INSTALLATION\n",
        "# ============================================================\n",
        "# In Google Colab, uncomment and run once:\n",
        "#\n",
        "# !pip install -q transformers accelerate bitsandbytes scikit-learn pandas\n",
        "#\n",
        "# Package explanation:\n",
        "# - transformers: loads tokenizer and LLM\n",
        "# - accelerate: helps model execution on GPU\n",
        "# - bitsandbytes: allows 4-bit quantization for smaller memory use\n",
        "# - scikit-learn: evaluation metrics\n",
        "# - pandas: result tables\n",
        "# ============================================================"
      ],
      "metadata": {
        "id": "2AHg0wV8prgd"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install -q transformers accelerate bitsandbytes scikit-learn pandas"
      ],
      "metadata": {
        "id": "siu52qo_qAZm"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 1. IMPORTS\n",
        "# ============================================================\n",
        "import re\n",
        "import json\n",
        "import random\n",
        "from collections import Counter\n",
        "\n",
        "import pandas as pd\n",
        "from sklearn.metrics import accuracy_score, f1_score, classification_report\n",
        "\n",
        "import torch\n",
        "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
        "\n",
        "# ============================================================\n",
        "# 2. REPRODUCIBILITY\n",
        "# ============================================================\n",
        "SEED = 42\n",
        "random.seed(SEED)\n",
        "torch.manual_seed(SEED)\n",
        "if torch.cuda.is_available():\n",
        "    torch.cuda.manual_seed_all(SEED)\n"
      ],
      "metadata": {
        "id": "lQuir1jpp-4G"
      },
      "execution_count": 22,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 3. MODEL CONFIGURATION\n",
        "# ============================================================\n",
        "# Recommended Colab-friendly open-source instruct models:\n",
        "# - Qwen/Qwen2.5-1.5B-Instruct   -> lighter, faster\n",
        "# - Qwen/Qwen2.5-3B-Instruct     -> better quality, still manageable\n",
        "# - google/gemma-2-2b-it         -> possible alternative\n",
        "#\n",
        "# Default choice:\n",
        "# Qwen 2.5 3B Instruct usually gives clearer sentiment results than very tiny models.\n",
        "MODEL_NAME = \"Qwen/Qwen2.5-3B-Instruct\"\n",
        "\n",
        "\n",
        "\n",
        "# device_map=\"auto\" lets Transformers place model layers automatically.\n",
        "DEVICE_MAP = \"auto\"\n",
        "\n",
        "print(f\"Loading model: {MODEL_NAME}\")\n",
        "\n",
        "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)\n",
        "if tokenizer.pad_token is None:\n",
        "    tokenizer.pad_token = tokenizer.eos_token\n",
        "\n",
        "model = AutoModelForCausalLM.from_pretrained(\n",
        "    MODEL_NAME,\n",
        "    device_map=DEVICE_MAP,\n",
        "    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,\n",
        ")\n",
        "\n",
        "print(\"Model loaded successfully.\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 84,
          "referenced_widgets": [
            "a025bd57d6eb4763ac35a61465bb8f11",
            "8c6b2179c0d2475f8482c09c642d530c",
            "fc8f7fe550fc475d86cf7e82daddd4d9",
            "6b8e4dab33734f92869a4e24bdb4e41a",
            "6c65ffc393e14d96a1bce8451ebc1704",
            "c4b975aa14614cd6ab0ca4b047f59e7f",
            "3cf86c0829c9439ebb20f56fd0f3c178",
            "141baed02735407f897cea750084d0c2",
            "43a102b292ed403eb604446ffafbe28e",
            "61289affeccd4c3ba7b4ff84d29033d6",
            "5158f4614d1c4411831b1ee44a7609dc"
          ]
        },
        "id": "yaveHJLtp-zy",
        "outputId": "6db199a6-1745-4f21-cc84-a18fa382175c"
      },
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Loading model: Qwen/Qwen2.5-3B-Instruct\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Loading weights:   0%|          | 0/434 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "a025bd57d6eb4763ac35a61465bb8f11"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model loaded successfully.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 4. GENERATION HELPER\n",
        "# ============================================================\n",
        "def generate_text(\n",
        "    prompt,\n",
        "    max_new_tokens=64,\n",
        "    temperature=0.0,\n",
        "    do_sample=False,\n",
        "    top_p=1.0,\n",
        "    repetition_penalty=1.0,\n",
        "):\n",
        "    \"\"\"\n",
        "    Generate text from the model.\n",
        "\n",
        "    Hyperparameters explained:\n",
        "    --------------------------\n",
        "    max_new_tokens:\n",
        "        Maximum number of NEW tokens generated.\n",
        "        For classification, short outputs are better because they reduce drift.\n",
        "        Typical range here: 16 to 80.\n",
        "\n",
        "    temperature:\n",
        "        Controls randomness.\n",
        "        0.0 = deterministic / nearly greedy\n",
        "        0.7 = more diverse outputs\n",
        "        For evaluation, use 0.0.\n",
        "        For self-consistency, use > 0.\n",
        "\n",
        "    do_sample:\n",
        "        False = deterministic decoding\n",
        "        True  = stochastic decoding\n",
        "        Needed for self-consistency because we want multiple diverse outputs.\n",
        "\n",
        "    top_p:\n",
        "        Nucleus sampling threshold.\n",
        "        1.0 means almost no restriction.\n",
        "        0.9 keeps only high-probability mass.\n",
        "\n",
        "    repetition_penalty:\n",
        "        Penalizes repeating tokens.\n",
        "        1.0 means no penalty.\n",
        "        Slight values like 1.05 can reduce repetitive outputs.\n",
        "    \"\"\"\n",
        "    inputs = tokenizer(prompt, return_tensors=\"pt\", truncation=True, padding=True)\n",
        "    inputs = {k: v.to(model.device) for k, v in inputs.items()}\n",
        "\n",
        "    with torch.no_grad():\n",
        "        outputs = model.generate(\n",
        "            **inputs,\n",
        "            max_new_tokens=max_new_tokens,\n",
        "            temperature=temperature,\n",
        "            do_sample=do_sample,\n",
        "            top_p=top_p,\n",
        "            repetition_penalty=repetition_penalty,\n",
        "            pad_token_id=tokenizer.pad_token_id,\n",
        "            eos_token_id=tokenizer.eos_token_id,\n",
        "        )\n",
        "\n",
        "    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
        "    if decoded.startswith(prompt):\n",
        "        return decoded[len(prompt):].strip()\n",
        "    return decoded.strip()\n"
      ],
      "metadata": {
        "id": "kLWHh0_Tp-t0"
      },
      "execution_count": 24,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 5. SENTIMENT DATASET\n",
        "# ============================================================\n",
        "# We use a small but balanced 3-class dataset.\n",
        "# Labels are intentionally lowercase to simplify parsing.\n",
        "# The examples include easy and medium-difficulty cases.\n",
        "\n",
        "sa_data = [\n",
        "    {\"text\": \"I absolutely loved this movie, it was funny and emotional.\", \"label\": \"positive\"},\n",
        "    {\"text\": \"The hotel staff were kind and the room was very clean.\", \"label\": \"positive\"},\n",
        "    {\"text\": \"This phone is excellent, fast, and easy to use.\", \"label\": \"positive\"},\n",
        "    {\"text\": \"The food was delicious and arrived earlier than expected.\", \"label\": \"positive\"},\n",
        "    {\"text\": \"I am very happy with the quality of this product.\", \"label\": \"positive\"},\n",
        "    {\"text\": \"The service was terrible and nobody helped me.\", \"label\": \"negative\"},\n",
        "    {\"text\": \"I regret buying this, it stopped working after two days.\", \"label\": \"negative\"},\n",
        "    {\"text\": \"The plot was boring and the acting was weak.\", \"label\": \"negative\"},\n",
        "    {\"text\": \"This was a frustrating experience and a waste of money.\", \"label\": \"negative\"},\n",
        "    {\"text\": \"The package arrived damaged and I am disappointed.\", \"label\": \"negative\"},\n",
        "    {\"text\": \"The meeting starts at 3 PM in room 201.\", \"label\": \"neutral\"},\n",
        "    {\"text\": \"I bought a notebook and a pen from the store.\", \"label\": \"neutral\"},\n",
        "    {\"text\": \"The train reached the station ten minutes late.\", \"label\": \"neutral\"},\n",
        "    {\"text\": \"She sent the document yesterday afternoon.\", \"label\": \"neutral\"},\n",
        "    {\"text\": \"The phone comes in black, blue, and silver colors.\", \"label\": \"neutral\"},\n",
        "    {\"text\": \"The movie was not bad, but it was not especially memorable either.\", \"label\": \"neutral\"},\n",
        "    {\"text\": \"The meal was okay, nothing special but not awful.\", \"label\": \"neutral\"},\n",
        "    {\"text\": \"It works fine, though I expected a little more from it.\", \"label\": \"neutral\"},\n",
        "]\n",
        "\n",
        "df_sa = pd.DataFrame(sa_data)\n",
        "print(df_sa)\n",
        "\n",
        "# ============================================================\n",
        "# 6. LABEL EXTRACTION\n",
        "# ============================================================\n",
        "VALID_LABELS = [\"positive\", \"negative\", \"neutral\"]\n",
        "\n",
        "def extract_label(output_text):\n",
        "    \"\"\"\n",
        "    Extract one sentiment label from raw model output.\n",
        "    We keep this parser simple and transparent.\n",
        "    \"\"\"\n",
        "    text = output_text.strip().lower()\n",
        "\n",
        "    # JSON case first\n",
        "    try:\n",
        "        parsed = json.loads(text)\n",
        "        if isinstance(parsed, dict) and \"label\" in parsed:\n",
        "            label = str(parsed[\"label\"]).strip().lower()\n",
        "            if label in VALID_LABELS:\n",
        "                return label\n",
        "    except Exception:\n",
        "        pass\n",
        "\n",
        "    # Explicit label patterns\n",
        "    for label in VALID_LABELS:\n",
        "        if re.search(rf\"\\b{label}\\b\", text):\n",
        "            return label\n",
        "\n",
        "    return \"unknown\"\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "9KDIUxMzqREU",
        "outputId": "5e29e3f6-7b41-4938-937d-aad6909526aa"
      },
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "                                                 text     label\n",
            "0   I absolutely loved this movie, it was funny an...  positive\n",
            "1   The hotel staff were kind and the room was ver...  positive\n",
            "2     This phone is excellent, fast, and easy to use.  positive\n",
            "3   The food was delicious and arrived earlier tha...  positive\n",
            "4   I am very happy with the quality of this product.  positive\n",
            "5      The service was terrible and nobody helped me.  negative\n",
            "6   I regret buying this, it stopped working after...  negative\n",
            "7        The plot was boring and the acting was weak.  negative\n",
            "8   This was a frustrating experience and a waste ...  negative\n",
            "9   The package arrived damaged and I am disappoin...  negative\n",
            "10            The meeting starts at 3 PM in room 201.   neutral\n",
            "11      I bought a notebook and a pen from the store.   neutral\n",
            "12    The train reached the station ten minutes late.   neutral\n",
            "13         She sent the document yesterday afternoon.   neutral\n",
            "14  The phone comes in black, blue, and silver col...   neutral\n",
            "15  The movie was not bad, but it was not especial...   neutral\n",
            "16  The meal was okay, nothing special but not awful.   neutral\n",
            "17  It works fine, though I expected a little more...   neutral\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 7. PROMPT TEMPLATES\n",
        "# ============================================================\n",
        "DIRECT_PROMPT = \"\"\"\n",
        "Classify the sentiment of the following text into exactly one label:\n",
        "positive, negative, neutral.\n",
        "Return only the label.\n",
        "\n",
        "Text: {text}\n",
        "Label:\n",
        "\"\"\".strip()\n",
        "\n",
        "ROLE_PROMPT = \"\"\"\n",
        "You are an expert sentiment analysis system.\n",
        "Classify the sentiment of the following text into exactly one label:\n",
        "positive, negative, neutral.\n",
        "Return only the label.\n",
        "\n",
        "Text: {text}\n",
        "Label:\n",
        "\"\"\".strip()\n",
        "\n",
        "ZERO_SHOT_COT_PROMPT = \"\"\"\n",
        "You are an expert sentiment analysis system.\n",
        "Classify the sentiment of the following text into exactly one label:\n",
        "positive, negative, neutral.\n",
        "Think briefly step by step, then return the final answer in this format:\n",
        "Reasoning: ...\n",
        "Label: <label>\n",
        "\n",
        "Text: {text}\n",
        "\"\"\".strip()\n",
        "\n",
        "FEW_SHOT_PROMPT = \"\"\"\n",
        "Classify the sentiment of the following text into exactly one label:\n",
        "positive, negative, neutral.\n",
        "Return only the label.\n",
        "\n",
        "Example 1:\n",
        "Text: I loved the acting and the soundtrack was amazing.\n",
        "Label: positive\n",
        "\n",
        "Example 2:\n",
        "Text: The device broke on the first day and customer support ignored me.\n",
        "Label: negative\n",
        "\n",
        "Example 3:\n",
        "Text: The conference starts tomorrow at 9 AM.\n",
        "Label: neutral\n",
        "\n",
        "Now classify this text.\n",
        "Text: {text}\n",
        "Label:\n",
        "\"\"\".strip()\n",
        "\n",
        "FEW_SHOT_COT_PROMPT = \"\"\"\n",
        "You are an expert sentiment analysis system.\n",
        "Classify the sentiment of the following text into exactly one label:\n",
        "positive, negative, neutral.\n",
        "Use the examples to guide your reasoning.\n",
        "Return your answer in this format:\n",
        "Reasoning: ...\n",
        "Label: <label>\n",
        "\n",
        "Example 1:\n",
        "Text: I loved the acting and the soundtrack was amazing.\n",
        "Reasoning: The opinion is clearly favorable and expresses enjoyment.\n",
        "Label: positive\n",
        "\n",
        "Example 2:\n",
        "Text: The device broke on the first day and customer support ignored me.\n",
        "Reasoning: The text expresses dissatisfaction and a bad experience.\n",
        "Label: negative\n",
        "\n",
        "Example 3:\n",
        "Text: The conference starts tomorrow at 9 AM.\n",
        "Reasoning: This is factual information without clear positive or negative emotion.\n",
        "Label: neutral\n",
        "\n",
        "Now classify this text.\n",
        "Text: {text}\n",
        "\"\"\".strip()\n",
        "\n",
        "JSON_PROMPT = \"\"\"\n",
        "You are an expert sentiment analysis system.\n",
        "Classify the sentiment of the following text into exactly one label:\n",
        "positive, negative, neutral.\n",
        "Return ONLY valid JSON in one line, with this schema:\n",
        "{\"label\": \"positive|negative|neutral\"}\n",
        "\n",
        "Text: {text}\n",
        "\"\"\".strip()\n"
      ],
      "metadata": {
        "id": "lNNxo7pxqRBU"
      },
      "execution_count": 26,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 8. EVALUATION HELPERS\n",
        "# ============================================================\n",
        "def run_experiment(prompt_template, experiment_name, temperature=0.0, do_sample=False, max_new_tokens=64):\n",
        "    results = []\n",
        "    print(\"=\" * 70)\n",
        "    print(experiment_name)\n",
        "    print(\"=\" * 70)\n",
        "\n",
        "    for row in sa_data:\n",
        "        prompt = prompt_template.format(text=row[\"text\"])\n",
        "        output = generate_text(\n",
        "            prompt=prompt,\n",
        "            max_new_tokens=max_new_tokens,\n",
        "            temperature=temperature,\n",
        "            do_sample=do_sample,\n",
        "            top_p=0.9,\n",
        "            repetition_penalty=1.0,\n",
        "        )\n",
        "        pred = extract_label(output)\n",
        "        results.append(\n",
        "            {\n",
        "                \"text\": row[\"text\"],\n",
        "                \"gold\": row[\"label\"],\n",
        "                \"pred\": pred,\n",
        "                \"raw_output\": output,\n",
        "            }\n",
        "        )\n",
        "\n",
        "    df_res = pd.DataFrame(results)\n",
        "    print(df_res[[\"text\", \"gold\", \"pred\"]])\n",
        "\n",
        "    y_true = df_res[\"gold\"].tolist()\n",
        "    y_pred = df_res[\"pred\"].tolist()\n",
        "\n",
        "    acc = accuracy_score(y_true, y_pred)\n",
        "    macro_f1 = f1_score(y_true, y_pred, average=\"macro\", labels=VALID_LABELS)\n",
        "\n",
        "    print(\"\\nAccuracy:\", acc)\n",
        "    print(\"Macro F1:\", macro_f1)\n",
        "    print(\"\\nClassification report:\")\n",
        "    print(classification_report(y_true, y_pred, labels=VALID_LABELS, zero_division=0))\n",
        "\n",
        "    return df_res, acc, macro_f1\n",
        "\n"
      ],
      "metadata": {
        "id": "gR-ggJi3qZ50"
      },
      "execution_count": 27,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 9. EXPERIMENTS 1 TO 5\n",
        "# ============================================================\n",
        "df_direct, acc_direct, f1_direct = run_experiment(\n",
        "    DIRECT_PROMPT,\n",
        "    \"Experiment 1 - Direct Prompting\",\n",
        "    temperature=0.0,\n",
        "    do_sample=False,\n",
        "    max_new_tokens=16,\n",
        ")\n",
        "\n",
        "df_role, acc_role, f1_role = run_experiment(\n",
        "    ROLE_PROMPT,\n",
        "    \"Experiment 2 - Role Prompting\",\n",
        "    temperature=0.0,\n",
        "    do_sample=False,\n",
        "    max_new_tokens=16,\n",
        ")\n",
        "\n",
        "df_zs_cot, acc_zs_cot, f1_zs_cot = run_experiment(\n",
        "    ZERO_SHOT_COT_PROMPT,\n",
        "    \"Experiment 3 - Zero-shot CoT\",\n",
        "    temperature=0.0,\n",
        "    do_sample=False,\n",
        "    max_new_tokens=64,\n",
        ")\n",
        "\n",
        "df_fs, acc_fs, f1_fs = run_experiment(\n",
        "    FEW_SHOT_PROMPT,\n",
        "    \"Experiment 4 - Few-shot Prompting\",\n",
        "    temperature=0.0,\n",
        "    do_sample=False,\n",
        "    max_new_tokens=16,\n",
        ")\n",
        "\n",
        "df_fs_cot, acc_fs_cot, f1_fs_cot = run_experiment(\n",
        "    FEW_SHOT_COT_PROMPT,\n",
        "    \"Experiment 5 - Few-shot CoT\",\n",
        "    temperature=0.0,\n",
        "    do_sample=False,\n",
        "    max_new_tokens=64,\n",
        ")\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Jgo07t3nqdLR",
        "outputId": "326eefc3-5a3a-4490-d899-e38a855331c6"
      },
      "execution_count": 28,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "======================================================================\n",
            "Experiment 1 - Direct Prompting\n",
            "======================================================================\n",
            "                                                 text      gold      pred\n",
            "0   I absolutely loved this movie, it was funny an...  positive   unknown\n",
            "1   The hotel staff were kind and the room was ver...  positive  positive\n",
            "2     This phone is excellent, fast, and easy to use.  positive  positive\n",
            "3   The food was delicious and arrived earlier tha...  positive  positive\n",
            "4   I am very happy with the quality of this product.  positive   unknown\n",
            "5      The service was terrible and nobody helped me.  negative   unknown\n",
            "6   I regret buying this, it stopped working after...  negative   unknown\n",
            "7        The plot was boring and the acting was weak.  negative  negative\n",
            "8   This was a frustrating experience and a waste ...  negative   unknown\n",
            "9   The package arrived damaged and I am disappoin...  negative  negative\n",
            "10            The meeting starts at 3 PM in room 201.   neutral   neutral\n",
            "11      I bought a notebook and a pen from the store.   neutral   neutral\n",
            "12    The train reached the station ten minutes late.   neutral  negative\n",
            "13         She sent the document yesterday afternoon.   neutral   neutral\n",
            "14  The phone comes in black, blue, and silver col...   neutral   neutral\n",
            "15  The movie was not bad, but it was not especial...   neutral   neutral\n",
            "16  The meal was okay, nothing special but not awful.   neutral   neutral\n",
            "17  It works fine, though I expected a little more...   neutral   neutral\n",
            "\n",
            "Accuracy: 0.6666666666666666\n",
            "Macro F1: 0.7277777777777779\n",
            "\n",
            "Classification report:\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "    positive       1.00      0.60      0.75         5\n",
            "    negative       0.67      0.40      0.50         5\n",
            "     neutral       1.00      0.88      0.93         8\n",
            "\n",
            "   micro avg       0.92      0.67      0.77        18\n",
            "   macro avg       0.89      0.62      0.73        18\n",
            "weighted avg       0.91      0.67      0.76        18\n",
            "\n",
            "======================================================================\n",
            "Experiment 2 - Role Prompting\n",
            "======================================================================\n",
            "                                                 text      gold      pred\n",
            "0   I absolutely loved this movie, it was funny an...  positive  positive\n",
            "1   The hotel staff were kind and the room was ver...  positive  positive\n",
            "2     This phone is excellent, fast, and easy to use.  positive  positive\n",
            "3   The food was delicious and arrived earlier tha...  positive  positive\n",
            "4   I am very happy with the quality of this product.  positive  positive\n",
            "5      The service was terrible and nobody helped me.  negative  negative\n",
            "6   I regret buying this, it stopped working after...  negative  negative\n",
            "7        The plot was boring and the acting was weak.  negative  negative\n",
            "8   This was a frustrating experience and a waste ...  negative  negative\n",
            "9   The package arrived damaged and I am disappoin...  negative  negative\n",
            "10            The meeting starts at 3 PM in room 201.   neutral  positive\n",
            "11      I bought a notebook and a pen from the store.   neutral  positive\n",
            "12    The train reached the station ten minutes late.   neutral  negative\n",
            "13         She sent the document yesterday afternoon.   neutral   neutral\n",
            "14  The phone comes in black, blue, and silver col...   neutral  positive\n",
            "15  The movie was not bad, but it was not especial...   neutral   neutral\n",
            "16  The meal was okay, nothing special but not awful.   neutral   neutral\n",
            "17  It works fine, though I expected a little more...   neutral   neutral\n",
            "\n",
            "Accuracy: 0.7777777777777778\n",
            "Macro F1: 0.7816627816627816\n",
            "\n",
            "Classification report:\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "    positive       0.62      1.00      0.77         5\n",
            "    negative       0.83      1.00      0.91         5\n",
            "     neutral       1.00      0.50      0.67         8\n",
            "\n",
            "    accuracy                           0.78        18\n",
            "   macro avg       0.82      0.83      0.78        18\n",
            "weighted avg       0.85      0.78      0.76        18\n",
            "\n",
            "======================================================================\n",
            "Experiment 3 - Zero-shot CoT\n",
            "======================================================================\n",
            "                                                 text      gold      pred\n",
            "0   I absolutely loved this movie, it was funny an...  positive  positive\n",
            "1   The hotel staff were kind and the room was ver...  positive  positive\n",
            "2     This phone is excellent, fast, and easy to use.  positive  positive\n",
            "3   The food was delicious and arrived earlier tha...  positive  positive\n",
            "4   I am very happy with the quality of this product.  positive  positive\n",
            "5      The service was terrible and nobody helped me.  negative  negative\n",
            "6   I regret buying this, it stopped working after...  negative  positive\n",
            "7        The plot was boring and the acting was weak.  negative  positive\n",
            "8   This was a frustrating experience and a waste ...  negative  positive\n",
            "9   The package arrived damaged and I am disappoin...  negative  positive\n",
            "10            The meeting starts at 3 PM in room 201.   neutral  positive\n",
            "11      I bought a notebook and a pen from the store.   neutral  positive\n",
            "12    The train reached the station ten minutes late.   neutral  positive\n",
            "13         She sent the document yesterday afternoon.   neutral  positive\n",
            "14  The phone comes in black, blue, and silver col...   neutral  positive\n",
            "15  The movie was not bad, but it was not especial...   neutral  positive\n",
            "16  The meal was okay, nothing special but not awful.   neutral  negative\n",
            "17  It works fine, though I expected a little more...   neutral  positive\n",
            "\n",
            "Accuracy: 0.3333333333333333\n",
            "Macro F1: 0.25396825396825395\n",
            "\n",
            "Classification report:\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "    positive       0.31      1.00      0.48         5\n",
            "    negative       0.50      0.20      0.29         5\n",
            "     neutral       0.00      0.00      0.00         8\n",
            "\n",
            "    accuracy                           0.33        18\n",
            "   macro avg       0.27      0.40      0.25        18\n",
            "weighted avg       0.23      0.33      0.21        18\n",
            "\n",
            "======================================================================\n",
            "Experiment 4 - Few-shot Prompting\n",
            "======================================================================\n",
            "                                                 text      gold      pred\n",
            "0   I absolutely loved this movie, it was funny an...  positive   unknown\n",
            "1   The hotel staff were kind and the room was ver...  positive   unknown\n",
            "2     This phone is excellent, fast, and easy to use.  positive   unknown\n",
            "3   The food was delicious and arrived earlier tha...  positive  positive\n",
            "4   I am very happy with the quality of this product.  positive   unknown\n",
            "5      The service was terrible and nobody helped me.  negative  negative\n",
            "6   I regret buying this, it stopped working after...  negative  negative\n",
            "7        The plot was boring and the acting was weak.  negative   unknown\n",
            "8   This was a frustrating experience and a waste ...  negative   unknown\n",
            "9   The package arrived damaged and I am disappoin...  negative  negative\n",
            "10            The meeting starts at 3 PM in room 201.   neutral   neutral\n",
            "11      I bought a notebook and a pen from the store.   neutral   neutral\n",
            "12    The train reached the station ten minutes late.   neutral   unknown\n",
            "13         She sent the document yesterday afternoon.   neutral   neutral\n",
            "14  The phone comes in black, blue, and silver col...   neutral   neutral\n",
            "15  The movie was not bad, but it was not especial...   neutral  positive\n",
            "16  The meal was okay, nothing special but not awful.   neutral   unknown\n",
            "17  It works fine, though I expected a little more...   neutral   unknown\n",
            "\n",
            "Accuracy: 0.4444444444444444\n",
            "Macro F1: 0.5674603174603173\n",
            "\n",
            "Classification report:\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "    positive       0.50      0.20      0.29         5\n",
            "    negative       1.00      0.60      0.75         5\n",
            "     neutral       1.00      0.50      0.67         8\n",
            "\n",
            "   micro avg       0.89      0.44      0.59        18\n",
            "   macro avg       0.83      0.43      0.57        18\n",
            "weighted avg       0.86      0.44      0.58        18\n",
            "\n",
            "======================================================================\n",
            "Experiment 5 - Few-shot CoT\n",
            "======================================================================\n",
            "                                                 text      gold      pred\n",
            "0   I absolutely loved this movie, it was funny an...  positive  positive\n",
            "1   The hotel staff were kind and the room was ver...  positive  positive\n",
            "2     This phone is excellent, fast, and easy to use.  positive  positive\n",
            "3   The food was delicious and arrived earlier tha...  positive  positive\n",
            "4   I am very happy with the quality of this product.  positive  positive\n",
            "5      The service was terrible and nobody helped me.  negative  negative\n",
            "6   I regret buying this, it stopped working after...  negative   unknown\n",
            "7        The plot was boring and the acting was weak.  negative   unknown\n",
            "8   This was a frustrating experience and a waste ...  negative  negative\n",
            "9   The package arrived damaged and I am disappoin...  negative   unknown\n",
            "10            The meeting starts at 3 PM in room 201.   neutral   unknown\n",
            "11      I bought a notebook and a pen from the store.   neutral  negative\n",
            "12    The train reached the station ten minutes late.   neutral  negative\n",
            "13         She sent the document yesterday afternoon.   neutral  positive\n",
            "14  The phone comes in black, blue, and silver col...   neutral  positive\n",
            "15  The movie was not bad, but it was not especial...   neutral  positive\n",
            "16  The meal was okay, nothing special but not awful.   neutral   neutral\n",
            "17  It works fine, though I expected a little more...   neutral  positive\n",
            "\n",
            "Accuracy: 0.4444444444444444\n",
            "Macro F1: 0.4603174603174603\n",
            "\n",
            "Classification report:\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "    positive       0.56      1.00      0.71         5\n",
            "    negative       0.50      0.40      0.44         5\n",
            "     neutral       1.00      0.12      0.22         8\n",
            "\n",
            "   micro avg       0.57      0.44      0.50        18\n",
            "   macro avg       0.69      0.51      0.46        18\n",
            "weighted avg       0.74      0.44      0.42        18\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# ============================================================\n",
        "# 10. SELF-CONSISTENCY\n",
        "# ============================================================\n",
        "def self_consistency_predict(prompt, n_samples=5, temperature=0.7, top_p=0.9):\n",
        "    \"\"\"\n",
        "    Run multiple sampled generations and take majority vote.\n",
        "\n",
        "    n_samples:\n",
        "        Number of independent generations.\n",
        "        5 is a good default for class demos.\n",
        "\n",
        "    temperature:\n",
        "        Controls diversity. Too low gives almost identical outputs.\n",
        "        Too high may cause noisy answers.\n",
        "        0.7 is a common compromise.\n",
        "\n",
        "    top_p:\n",
        "        Restricts the token sampling pool to high-probability mass.\n",
        "        0.9 is a standard practical value.\n",
        "    \"\"\"\n",
        "    sampled_labels = []\n",
        "    sampled_outputs = []\n",
        "\n",
        "    for _ in range(n_samples):\n",
        "        output = generate_text(\n",
        "            prompt=prompt,\n",
        "            max_new_tokens=64,\n",
        "            temperature=temperature,\n",
        "            do_sample=True,\n",
        "            top_p=top_p,\n",
        "            repetition_penalty=1.0,\n",
        "        )\n",
        "        sampled_outputs.append(output)\n",
        "        sampled_labels.append(extract_label(output))\n",
        "\n",
        "    final_pred = Counter(sampled_labels).most_common(1)[0][0]\n",
        "    return final_pred, sampled_labels, sampled_outputs\n",
        "\n",
        "sc_results = []\n",
        "for row in sa_data:\n",
        "    prompt = ZERO_SHOT_COT_PROMPT.format(text=row[\"text\"])\n",
        "    pred, labels, outputs = self_consistency_predict(prompt, n_samples=5, temperature=0.7, top_p=0.9)\n",
        "    sc_results.append(\n",
        "        {\n",
        "            \"text\": row[\"text\"],\n",
        "            \"gold\": row[\"label\"],\n",
        "            \"pred\": pred,\n",
        "            \"sampled_labels\": labels,\n",
        "            \"sampled_outputs\": outputs,\n",
        "        }\n",
        "    )\n",
        "\n",
        "df_sc = pd.DataFrame(sc_results)\n",
        "print(df_sc[[\"text\", \"gold\", \"pred\", \"sampled_labels\"]])\n",
        "print(\"\\nExperiment 6 - Self-consistency\")\n",
        "print(\"Accuracy:\", accuracy_score(df_sc[\"gold\"], df_sc[\"pred\"]))\n",
        "print(\"Macro F1:\", f1_score(df_sc[\"gold\"], df_sc[\"pred\"], average=\"macro\", labels=VALID_LABELS))\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "R63MRs8_qQ-U",
        "outputId": "37ae0963-c226-46d3-8704-7042dac3e573"
      },
      "execution_count": 29,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "                                                 text      gold      pred  \\\n",
            "0   I absolutely loved this movie, it was funny an...  positive  positive   \n",
            "1   The hotel staff were kind and the room was ver...  positive  positive   \n",
            "2     This phone is excellent, fast, and easy to use.  positive  positive   \n",
            "3   The food was delicious and arrived earlier tha...  positive  positive   \n",
            "4   I am very happy with the quality of this product.  positive  positive   \n",
            "5      The service was terrible and nobody helped me.  negative  positive   \n",
            "6   I regret buying this, it stopped working after...  negative  negative   \n",
            "7        The plot was boring and the acting was weak.  negative  positive   \n",
            "8   This was a frustrating experience and a waste ...  negative  positive   \n",
            "9   The package arrived damaged and I am disappoin...  negative  negative   \n",
            "10            The meeting starts at 3 PM in room 201.   neutral  positive   \n",
            "11      I bought a notebook and a pen from the store.   neutral  positive   \n",
            "12    The train reached the station ten minutes late.   neutral  negative   \n",
            "13         She sent the document yesterday afternoon.   neutral  positive   \n",
            "14  The phone comes in black, blue, and silver col...   neutral  positive   \n",
            "15  The movie was not bad, but it was not especial...   neutral  positive   \n",
            "16  The meal was okay, nothing special but not awful.   neutral  positive   \n",
            "17  It works fine, though I expected a little more...   neutral  positive   \n",
            "\n",
            "                                       sampled_labels  \n",
            "0   [positive, positive, positive, positive, posit...  \n",
            "1   [positive, positive, positive, positive, posit...  \n",
            "2   [positive, positive, positive, positive, posit...  \n",
            "3   [positive, positive, positive, positive, posit...  \n",
            "4   [positive, positive, positive, positive, posit...  \n",
            "5   [positive, positive, negative, positive, posit...  \n",
            "6   [negative, positive, negative, negative, negat...  \n",
            "7   [positive, positive, positive, positive, posit...  \n",
            "8   [negative, positive, positive, negative, posit...  \n",
            "9   [unknown, negative, negative, negative, positive]  \n",
            "10  [negative, positive, positive, positive, posit...  \n",
            "11  [positive, positive, positive, positive, posit...  \n",
            "12  [negative, positive, positive, negative, negat...  \n",
            "13  [positive, negative, neutral, positive, positive]  \n",
            "14  [positive, positive, unknown, positive, positive]  \n",
            "15  [negative, positive, positive, positive, posit...  \n",
            "16  [neutral, positive, positive, positive, positive]  \n",
            "17  [positive, positive, positive, positive, posit...  \n",
            "\n",
            "Experiment 6 - Self-consistency\n",
            "Accuracy: 0.3888888888888889\n",
            "Macro F1: 0.3333333333333333\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "# Practical interpretation:\n",
        "# - Direct prompting: cheapest baseline\n",
        "# - Role prompting: tiny prompt tweak, sometimes small gain\n",
        "# - Zero-shot CoT: helpful in reasoning tasks, sometimes limited in SA\n",
        "# - Few-shot prompting: often the most visibly useful for SA\n",
        "# - Few-shot CoT: can improve interpretability and sometimes performance\n",
        "# - Self-consistency: stronger but slower and more expensive\n",
        "# - JSON output: useful for reliable downstream parsing\n",
        "# ============================================================\n",
        "\n",
        "\n"
      ],
      "metadata": {
        "id": "5MUoEvNPpq0k"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "cg4hpJAZyOjt"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}