{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        ":::{.content-hidden}\n",
        "# Human Annotations\n",
        ":::"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Install the `label-studio-sdk` package for programmatic control of Label Studio:"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "J1sLoUSsXzS4"
      },
      "outputs": [],
      "source": [
        "!pip -q install label-studio-sdk"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "NBow_A02gg8j"
      },
      "source": [
        "Next, let's read the text master from the previous sessions"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "id": "i9hkqOpUb8kQ"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "\n",
        "df = pd.read_csv('/content/drive/MyDrive/2023-12-01-Export-Posts-Text-Master.csv')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "-TbauZ65PWhU"
      },
      "source": [
        "In my [video on GPT text classification](https://youtu.be/QcYGwC4QzW0) I mentioned the problem of the unique identifier, as we also need a unique identifier for the annotations. Use the code below in our text classification notebook when working with multidocument classifications!"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "id": "4IGsSLHCPypm"
      },
      "outputs": [],
      "source": [
        "df['identifier'] = df.apply(lambda x: f\"{x['shortcode']}-{x['Text Type']}\", axis=1)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "Y3NRaT_4gxes",
        "outputId": "155af2da-17ca-46c4-947e-6565f40640de"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "\n",
              "  <div id=\"df-2c59a3eb-cae0-446b-a21d-3a49644eb089\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Unnamed: 0</th>\n",
              "      <th>shortcode</th>\n",
              "      <th>Text</th>\n",
              "      <th>Text Type</th>\n",
              "      <th>Policy Issues</th>\n",
              "      <th>identifier</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>0</td>\n",
              "      <td>CyMAe_tufcR</td>\n",
              "      <td>#Landtagswahl23 🤩🧡🙏 #FREIEWÄHLER #Aiwanger #Da...</td>\n",
              "      <td>Caption</td>\n",
              "      <td>['1. Political parties:\\n- FREIEWÄHLER\\n- Aiwa...</td>\n",
              "      <td>CyMAe_tufcR-Caption</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>CyL975vouHU</td>\n",
              "      <td>Die Landtagswahl war für uns als Liberale hart...</td>\n",
              "      <td>Caption</td>\n",
              "      <td>['Landtagswahl']</td>\n",
              "      <td>CyL975vouHU-Caption</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>2</td>\n",
              "      <td>CyL8GWWJmci</td>\n",
              "      <td>Nach einem starken Wahlkampf ein verdientes Er...</td>\n",
              "      <td>Caption</td>\n",
              "      <td>['1. Wahlkampf und Wahlergebnis:\\n- Wahlkampf\\...</td>\n",
              "      <td>CyL8GWWJmci-Caption</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>3</td>\n",
              "      <td>CyL7wyJtTV5</td>\n",
              "      <td>So viele Menschen am Odeonsplatz heute mit ein...</td>\n",
              "      <td>Caption</td>\n",
              "      <td>['Israel', 'Terrorismus', 'Hamas', 'Entwicklun...</td>\n",
              "      <td>CyL7wyJtTV5-Caption</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>4</td>\n",
              "      <td>CyLxwHuvR4Y</td>\n",
              "      <td>Herzlichen Glückwunsch zu diesem grandiosen Wa...</td>\n",
              "      <td>Caption</td>\n",
              "      <td>['1. Wahlsieg und Parlamentseinstieg\\n- Wahlsi...</td>\n",
              "      <td>CyLxwHuvR4Y-Caption</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2c59a3eb-cae0-446b-a21d-3a49644eb089')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-2c59a3eb-cae0-446b-a21d-3a49644eb089 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-2c59a3eb-cae0-446b-a21d-3a49644eb089');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-d1ba7831-9de2-4749-b87e-81da240e5126\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d1ba7831-9de2-4749-b87e-81da240e5126')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-d1ba7831-9de2-4749-b87e-81da240e5126 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "   Unnamed: 0    shortcode                                               Text  \\\n",
              "0           0  CyMAe_tufcR  #Landtagswahl23 🤩🧡🙏 #FREIEWÄHLER #Aiwanger #Da...   \n",
              "1           1  CyL975vouHU  Die Landtagswahl war für uns als Liberale hart...   \n",
              "2           2  CyL8GWWJmci  Nach einem starken Wahlkampf ein verdientes Er...   \n",
              "3           3  CyL7wyJtTV5  So viele Menschen am Odeonsplatz heute mit ein...   \n",
              "4           4  CyLxwHuvR4Y  Herzlichen Glückwunsch zu diesem grandiosen Wa...   \n",
              "\n",
              "  Text Type                                      Policy Issues  \\\n",
              "0   Caption  ['1. Political parties:\\n- FREIEWÄHLER\\n- Aiwa...   \n",
              "1   Caption                                   ['Landtagswahl']   \n",
              "2   Caption  ['1. Wahlkampf und Wahlergebnis:\\n- Wahlkampf\\...   \n",
              "3   Caption  ['Israel', 'Terrorismus', 'Hamas', 'Entwicklun...   \n",
              "4   Caption  ['1. Wahlsieg und Parlamentseinstieg\\n- Wahlsi...   \n",
              "\n",
              "            identifier  \n",
              "0  CyMAe_tufcR-Caption  \n",
              "1  CyL975vouHU-Caption  \n",
              "2  CyL8GWWJmci-Caption  \n",
              "3  CyL7wyJtTV5-Caption  \n",
              "4  CyLxwHuvR4Y-Caption  "
            ]
          },
          "execution_count": 8,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "df.head()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### LabelStudio Setup\n",
        "Please specify the the URL and API-Key for you LabelStudio Instance."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 12,
      "metadata": {
        "cellView": "form",
        "id": "71Cj4_19X3AV"
      },
      "outputs": [],
      "source": [
        "import json\n",
        "from google.colab import userdata\n",
        "\n",
        "labelstudio_key_name = \"label2-key\"\n",
        "labelstudio_key = userdata.get(labelstudio_key_name)\n",
        "labelstudio_url = \"https://label2.digitalhumanities.io\""
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "NyWtV-3PDxn3"
      },
      "source": [
        "#### Create LabelStudio Interface\n",
        "Before creating the LabelStudio project you will need to define your labelling interface. Once the project is set up you will only be able to edit the interface in LabelStudio."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "id": "kVhp0vEGE4an"
      },
      "outputs": [],
      "source": [
        "interface = \"\"\"\n",
        "<View style=\"display:flex;\">\n",
        "  <View style=\"flex:33%\">\n",
        "    <Text name=\"Text\" value=\"$Text\"/>\n",
        "  </View>\n",
        "  <View style=\"flex:66%\">\n",
        "\"\"\""
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### Add a simple coding interface\n",
        "Do you want add codes (Classification) to the images? Please name your coding instance and add options. <br/> **By running this cell multiple times you're able to add multiple variables (not recommended)**\n",
        "\n",
        "Add the variable name to `coding_name`, the checkbox labels in `coding_values`, and define whether to expect `single` choice or `multiple` choice input for this variable in `coding_choice`."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "iYYcR7nnIaj7"
      },
      "outputs": [],
      "source": [
        "coding_name = \"Sentiment\"\n",
        "coding_values = \"Positive,Neutral,Negative\"\n",
        "coding_choice = \"single\"\n",
        "\n",
        "coding_interface = '<Header value=\"{}\" /><Choices name=\"{}\" choice=\"{}\" toName=\"Text\">'.format(coding_name, coding_name,coding_choice)\n",
        "\n",
        "for value in coding_values.split(\",\"):\n",
        "  value = value.strip()\n",
        "  coding_interface += '<Choice value=\"{}\" />'.format(value)\n",
        "\n",
        "coding_interface += \"</Choices>\"\n",
        "\n",
        "interface += coding_interface\n",
        "\n",
        "print(\"Added {}\".format(coding_name))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "KJPxxRGZQvwe"
      },
      "source": [
        "Finally run the next line to close the XML of the annotation interface. **Run this line even if you do not want to add any variables at the moment!** "
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "id": "I1B_JTpMUbjy"
      },
      "outputs": [],
      "source": [
        "interface += \"\"\"\n",
        "        </View>\n",
        "    </View>\n",
        "    \"\"\""
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### Project Upload\n",
        "This final step creates a LabelStudio project and configures the interface. Define a `project_name`, select the `text_column`, and `identifier_column`. Additionally, you may define a `sample_percentage` for sampling, we start with $30\\%$. When working with the Open Source version of Label Studio we need to create on project per annotator, enter the number of annotators in `num_copies` to create multiple copies at once."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 14,
      "metadata": {
        "cellView": "form",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Uyvam3dH7uB5",
        "outputId": "3e0856ad-0bf7-42a1-9ee6-ea17887f83fb"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "All done, created project #0! Visit https://label2.digitalhumanities.io/projects/61/ and get started labelling!\n"
          ]
        }
      ],
      "source": [
        "from label_studio_sdk import Client\n",
        "import contextlib\n",
        "import io\n",
        "\n",
        "project_name = \"vSMA Test 1\" \n",
        "text_column = \"Text\" \n",
        "identifier_column = \"identifier\" \n",
        "sample_percentage = 30  \n",
        "num_copies = 1 \n",
        "\n",
        "sample_size = round(len(df) * (sample_percentage / 100))\n",
        "\n",
        "ls = Client(url=labelstudio_url, api_key=labelstudio_key)\n",
        "\n",
        "df_tasks = df[[identifier_column, text_column]]\n",
        "df_tasks = df_tasks.sample(sample_size)\n",
        "df_tasks = df_tasks.fillna(\"\")\n",
        "\n",
        "for i in range(0, num_copies):\n",
        "  project_name = f\"{project_name} #{i}\"\n",
        "  # Create the project\n",
        "  project = ls.start_project(\n",
        "      title=project_name,\n",
        "      label_config=interface,\n",
        "      sampling=\"Uniform sampling\"\n",
        "  )\n",
        "\n",
        "  with contextlib.redirect_stdout(io.StringIO()):\n",
        "    project.import_tasks(\n",
        "          df_tasks.to_dict('records')\n",
        "        )\n",
        "\n",
        "  print(f\"All done, created project #{i}! Visit {labelstudio_url}/projects/{project.id}/ and get started labelling!\")"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}