Application of BERT Language Model

Install dependencies first before running other codes.

pip install transformers torch

1. Feature Extraction

from transformers import AutoTokenizer, AutoModel
import torch

tokenizer = AutoTokenizer.from_pretrained("huawei-noah/TinyBERT_General_4L_312D")
model = AutoModel.from_pretrained("huawei-noah/TinyBERT_General_4L_312D")

text = "Example text for feature extraction."
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
    outputs = model(**inputs)
features = outputs.last_hidden_state.mean(dim=1)  # Average pooling
print(features)

output:

tensor([[ 1.9740e-01,  2.5477e-01,  1.4509e-01,  1.7566e-01, -6.6479e-02,
          1.6524e-01,  4.2747e-02,  1.6712e-01,  9.2036e-02, -1.2712e-01,
          2.1206e-01,  2.8614e-02,  1.7599e-01, -1.0995e-01,  3.8088e-01,
         -5.3928e-02,  8.4479e-02,  1.5780e-01, -1.9029e-02,  2.0891e-01,
          2.3006e-01,  4.3518e-02,  9.5676e-02, -8.3910e-02, -2.1702e-01,
         -2.9476e-01,  1.2470e-01, -1.9291e-01,  7.4890e-02,  1.6493e-01,
         -9.8748e-02, -9.4859e-02,  1.6784e-01,  4.5727e-02, -8.9265e-02,
         -6.5349e-02,  1.6743e-01,  9.2660e-04,  5.6716e-01, -5.5762e-02,
          1.5681e-01, -7.8924e-02,  3.5071e-02, -8.7810e-02, -1.7692e-01,
         -1.4990e-01, -2.5306e-01,  2.3991e-01, -3.5500e-01,  3.6740e-02,
          2.7735e-02,  5.4039e-04, -1.2296e-01,  4.1244e-01, -7.1209e-02,
         -3.6841e-01,  4.6811e-01,  1.1383e-01, -4.8712e-02, -4.1311e-02,
         -2.5833e-01,  1.0461e-01, -3.3426e-01,  2.4857e-01,  3.0730e-02,
         -3.4563e-01,  2.8982e-02,  2.0955e-01, -9.0439e-02, -5.0176e-01,
         -1.3220e-01, -7.2213e-02, -1.3017e-01,  3.7756e-01,  4.5529e-02,
          5.5027e-01, -4.4919e-02, -2.7063e-01,  2.0046e-01,  2.1194e-01,
         -4.6654e-02, -2.2291e-01,  1.2578e-01,  6.3882e-02, -1.2810e-02,
          5.3850e-02, -1.0923e-01,  3.5653e-01, -2.0520e-01,  2.3189e-01,
         -1.7130e-01, -1.3012e-01,  2.2879e-01,  1.2951e-01, -3.7784e-01,
         -9.4571e-02,  1.5032e-01,  3.7665e-02,  1.3622e-01,  3.6018e-01,
          3.2697e-01,  3.3267e-01,  1.3728e-01, -7.0549e-02, -3.2908e-02,
          9.0375e-02, -1.1214e-01, -9.4537e-02,  2.3070e-01,  2.6935e-02,
         -7.6006e-02,  1.8334e-01, -1.6602e-01, -2.9112e-01,  3.8217e-02,
         -1.0858e-01, -8.2849e-02,  2.8999e-01, -5.3112e-02,  3.7262e-02,
         -9.7637e-02, -4.6162e-02, -3.9022e-02, -1.8257e-01, -4.9632e-02,
         -1.3525e-01,  2.8124e-01, -4.7314e-01, -1.2156e-01,  5.5921e-03,
         -9.9637e-03,  5.2087e-02,  1.0867e-01,  5.8512e-03,  8.7491e-02,
          7.3979e-02,  2.4615e+00,  3.7371e-01,  3.7762e-02, -1.2664e-01,
         -4.0282e-02, -2.5334e-01,  2.6887e-02, -9.8101e-02,  1.7890e-01,
          1.8016e-01,  2.0606e-01, -1.3461e-01, -2.3592e-02, -1.4069e-01,
         -2.0459e-03,  3.9103e-01,  3.3364e-02,  1.7682e-01,  1.1465e-01,
         -4.3010e-02,  1.0913e-01,  2.0352e-01, -8.3672e-02,  9.9100e-02,
         -1.1458e-01, -2.7574e-01,  5.8051e-02,  6.7526e-03, -4.5494e-02,
          1.2360e-01,  2.2015e-01,  5.1357e-02, -1.3168e-01, -7.3664e-02,
         -4.2616e-02,  2.5773e-01,  4.6702e-03, -1.2486e-01, -5.7714e-02,
         -1.8647e-01, -5.2960e-02, -4.7788e-02,  1.7559e-01, -3.6615e-02,
          1.1048e-01,  3.4949e-02, -3.5863e-02,  2.3993e-02,  3.4114e-01,
         -1.5924e-02,  1.1467e-01,  4.2615e-01,  4.1695e-02,  2.9016e-01,
         -4.4460e-02,  6.5947e-02, -7.7143e-02, -1.2469e-01, -1.3784e-01,
         -2.5800e-02, -2.5196e-01,  4.0812e-02, -2.1486e-02, -6.5837e-02,
         -2.1216e-01, -1.6616e-02, -6.8074e-03,  2.9654e-01,  1.2036e-01,
          3.5091e-01,  1.5768e-01, -1.5767e-02, -3.8023e-01, -1.5276e-02,
         -9.0135e-02, -1.8029e-01,  1.8638e-02,  8.4511e-03,  1.9307e-01,
          6.4887e-02, -4.4351e-02,  3.5009e-01,  4.1193e-02, -9.4739e-02,
          1.7384e-01,  1.7389e-01,  1.1192e-01,  3.3265e-01,  1.7295e-01,
         -3.0358e-01, -1.1762e-01,  4.1907e-02, -2.4472e-01, -1.4035e-01,
          2.5353e-01,  3.4157e-01,  1.4558e-02,  7.1354e-03, -1.6984e-02,
         -5.5282e-02,  1.0725e-01,  2.8647e-03, -1.9686e-01,  2.1436e-01,
         -5.1173e-01,  4.6730e-01,  1.6907e-01,  3.1645e-01, -2.6974e-01,
          9.8415e-02,  4.0037e-01, -1.5342e-01, -8.5458e-02,  1.9014e-01,
         -7.1404e-02,  1.3187e-01, -3.3614e-01,  9.9421e-02,  7.1365e-02,
         -3.8709e-01,  2.3684e-02,  6.3747e-02,  1.5607e-01,  5.0626e-03,
          2.7010e-01, -8.2233e-02,  3.0945e-01,  7.1563e-02, -1.7492e-01,
         -1.8688e-01, -3.3523e-01, -2.3277e-01, -1.4823e-01, -4.4523e-02,
         -1.8755e-01,  2.3952e-01, -1.3176e-02,  2.2850e-01,  1.6860e-01,
          3.0881e-03,  1.5961e-01, -2.3334e-01,  4.7723e-02,  1.5599e-01,
          3.6142e-01,  3.4087e-01,  1.1393e-01,  2.2160e-01,  1.9627e-01,
          3.0980e-03, -1.7946e-02, -1.4120e-01, -6.7836e-03,  2.4087e-01,
         -1.8232e-01, -9.6212e-02, -9.9146e-02, -2.8458e-04, -1.7843e-01,
         -9.4016e-02,  9.4367e-02, -6.2880e-02, -2.9643e-01,  2.7444e-01,
         -1.8214e-02,  6.2151e-02, -1.9957e-01,  8.9322e-03,  3.8030e-01,
          2.2031e-02, -7.9656e-02,  3.1843e-01, -1.1330e-01, -1.6839e-01,
         -1.0379e-01,  1.8142e-01]])

The tensor obtained from the feature extraction using TinyBERT represents the embedding of the input text. This embedding can be used in several ways:

1. Downstream Machine Learning Tasks

You can use the extracted features as input to traditional machine learning models for tasks like classification, regression, or clustering. Here's how:

Example: Text Classification

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assume you have a dataset with extracted features and labels
X = [features.numpy()]  # Your extracted features
y = [0]  # Your labels

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Model accuracy: {accuracy}")

2. Clustering

You can cluster your text data using algorithms like K-Means or DBSCAN to discover patterns or group similar texts.

Example: K-Means Clustering

from sklearn.cluster import KMeans

# Assuming you have multiple feature vectors
features_list = [features.numpy(), ...]  # List of feature vectors

kmeans = KMeans(n_clusters=3)
kmeans.fit(features_list)

# Print cluster assignments
print(kmeans.labels_)

3. Visualization

You can visualize the embeddings using techniques like t-SNE or PCA to reduce the dimensionality for better interpretability.

Example: t-SNE Visualization

from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

# Reduce dimensionality for visualization
tsne = TSNE(n_components=2)
reduced_features = tsne.fit_transform(features_list)

plt.scatter(reduced_features[:, 0], reduced_features[:, 1])
plt.title("t-SNE Visualization of Text Embeddings")
plt.show()

You can calculate similarities between different text embeddings to find related texts.

Example: Cosine Similarity

from sklearn.metrics.pairwise import cosine_similarity

# Compare with another embedding
similarity = cosine_similarity(features, another_embedding)
print(f"Cosine Similarity: {similarity[0][0]}")

5. Input to Neural Networks

You can use the embeddings as input to more complex neural networks, such as feedforward networks or LSTMs, for specific tasks.

2. Fine-Tuning for Text Classification

Install dependencies (datasets)

!pip install transformers datasets torch

Run

import os
# Disable WandB logging by setting the environment variable
os.environ["WANDB_DISABLED"] = "true"

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset

# Load TinyBERT tokenizer and model
model_name = "huawei-noah/TinyBERT_General_4L_312D"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)  # Binary classification

# Sample dataset
data = {
    'text': [
        "I love programming.",
        "Python is an amazing language.",
        "I dislike bugs in code.",
        "Debugging is really frustrating.",
        "I enjoy solving problems with code."
    ],
    'label': [1, 1, 0, 0, 1]  # 1 = Positive, 0 = Negative
}

# Create a dataset
dataset = Dataset.from_dict(data)

# Define max_length for tokenization
max_length = 8  # Adjust this if needed

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=max_length)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Split the dataset into train and test sets
train_test_split = tokenized_dataset.train_test_split(test_size=0.2)
train_dataset = train_test_split['train']
test_dataset = train_test_split['test']

# Set training arguments with report_to=None
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    report_to=None  # Disable logging reports
)

# Create a Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

# Train the model
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate()
print(f"Evaluation Results: {eval_results}")

# Example inference
def predict(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=max_length)
    with torch.no_grad():
        logits = model(**inputs).logits
    predicted_class = logits.argmax(dim=-1).item()
    return predicted_class

# Test the model with a new example
new_example = "I love solving complex problems."
prediction = predict(new_example)
print(f"Prediction for '{new_example}': {prediction} (1 = Positive, 0 = Negative)")

Output:


Epoch    Training Loss    Validation Loss
1    No log    0.674932
2    No log    0.676540
3    No log    0.676309
 [1/1 : < :]
Evaluation Results: {'eval_loss': 0.6763088703155518, 'eval_runtime': 0.0209, 'eval_samples_per_second': 47.735, 'eval_steps_per_second': 47.735, 'epoch': 3.0}
Prediction for 'I love solving complex problems.': 1 (1 = Positive, 0 = Negative)

Fine-Tuning vs Transfer Learning

Transfer learning and fine-tuning are closely related concepts in machine learning, particularly in the context of natural language processing (NLP) and text classification. Here's how they differ:

Transfer Learning

  1. Definition:

    • Transfer learning refers to the practice of taking a pre-trained model (often trained on a large dataset) and applying it to a different but related task.

    • The goal is to leverage the knowledge gained from the source task to improve performance on the target task.

  2. Process:

    • Involves using a model that has been trained on a large dataset (e.g., BERT, TinyBERT) and applying it to a new task with potentially limited data.

    • The pre-trained model is generally used as a feature extractor, where you might only use the output from its layers.

  3. Applications:

    • Common in scenarios where labeled data for the target task is scarce, allowing you to benefit from the knowledge embedded in pre-trained models.

Fine-Tuning

  1. Definition:

    • Fine-tuning is a specific instance of transfer learning where you take a pre-trained model and continue training it on your specific task.

    • This process adjusts the weights of the model to better fit the new dataset.

  2. Process:

    • Involves unfreezing the weights of the pre-trained model and training it on your dataset, often with a smaller learning rate.

    • Fine-tuning can involve training all layers of the model or just the last few layers, depending on the task and the amount of data available.

  3. Applications:

    • Fine-tuning is particularly effective when you have a moderate-sized dataset and you want to adapt a model that was initially trained on a different but related task.

Summary of Differences

  • Scope:

    • Transfer learning is a broader concept that includes any application of knowledge from one task to another, while fine-tuning is a specific technique within transfer learning.
  • Training Process:

    • In transfer learning, you might only use the pre-trained model as a feature extractor. In fine-tuning, you adjust the model's weights through additional training on your specific dataset.
  • Data Requirements:

    • Transfer learning can be effective with very little data, whereas fine-tuning typically requires some labeled data to adjust the model's weights effectively.

Example in Text Classification

  • Transfer Learning: Using a pre-trained BERT model to extract features from text without further training, perhaps for a simple classification task.

  • Fine-Tuning: Taking the same pre-trained BERT model and training it on a specific text classification task (e.g., sentiment analysis) with labeled examples, allowing the model to learn the nuances of the new task.

3. Transfer Learning

import os
# Disable WandB logging by setting the environment variable
os.environ["WANDB_DISABLED"] = "true"

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset

# Load TinyBERT tokenizer and model
model_name = "huawei-noah/TinyBERT_General_4L_312D"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)  # Binary classification

# Sample dataset
data = {
    'text': [
        "I love programming.",
        "Python is an amazing language.",
        "I dislike bugs in code.",
        "Debugging is really frustrating.",
        "I enjoy solving problems with code.",
        "The sun is shining brightly.",
        "I hate getting stuck in traffic.",
        "Life is beautiful.",
        "I am excited about learning new things.",
        "I feel sad when I see pollution."
    ],
    'label': [1, 1, 0, 0, 1, 1, 0, 1, 1, 0]  # 1 = Positive, 0 = Negative
}

# Create a dataset
dataset = Dataset.from_dict(data)

# Define max_length for tokenization
max_length = 10  # Adjust this if needed

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=max_length)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Split the dataset into train and test sets
train_test_split = tokenized_dataset.train_test_split(test_size=0.2)
train_dataset = train_test_split['train']
test_dataset = train_test_split['test']

# Set training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    report_to=None  # Disable logging reports
)

# Create a Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

# Train the model
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate()
print(f"Evaluation Results: {eval_results}")

# Example inference
def predict(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=max_length)
    with torch.no_grad():
        logits = model(**inputs).logits
    predicted_class = logits.argmax(dim=-1).item()
    return predicted_class

# Test the model with new examples
new_examples = [
    "I love hiking in the mountains.",
    "I am very angry about the current situation."
]

for example in new_examples:
    prediction = predict(example)
    print(f"Prediction for '{example}': {prediction} (1 = Positive, 0 = Negative)")

Output:

Evaluation Results: {'eval_loss': 0.6723592281341553, 'eval_runtime': 0.0324, 'eval_samples_per_second': 61.791, 'eval_steps_per_second': 30.895, 'epoch': 3.0}
Prediction for 'I love hiking in the mountains.': 1 (1 = Positive, 0 = Negative)
Prediction for 'I am very angry about the current situation.': 0 (1 = Positive, 0 = Negative)

Explanation of Key Components

  1. Dataset Creation: A simple dataset is created with text samples and binary labels (1 for positive sentiment, 0 for negative sentiment).

  2. Tokenization: The tokenize_function ensures that all text inputs are tokenized consistently and padded to a specified maximum length.

  3. Train/Test Split: The dataset is split into training and testing sets.

  4. Training Arguments: The TrainingArguments class specifies parameters for training, such as learning rate, batch size, and number of epochs.

  5. Trainer: The Trainer class is used to handle the training and evaluation process.

  6. Model Training and Evaluation: The model is trained on the training dataset, and evaluation results are printed.

  7. Inference: A function to predict the sentiment of new text examples is provided, along with sample predictions.

4. Sentence Similarity

import os
import torch
from transformers import AutoTokenizer, AutoModel, Trainer, TrainingArguments

# Disable WandB logging by setting the environment variable
os.environ["WANDB_DISABLED"] = "true"

# Load TinyBERT tokenizer and model (using AutoModel for hidden states)
model_name = "huawei-noah/TinyBERT_General_4L_312D"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name, output_hidden_states=True)  # Enable hidden states

# Sample texts
text1 = "I love programming."
text2 = "Python is an amazing language."

# Tokenize the inputs
inputs1 = tokenizer(text1, return_tensors="pt", truncation=True, padding=True, max_length=10)
inputs2 = tokenizer(text2, return_tensors="pt", truncation=True, padding=True, max_length=10)

# Get the last hidden states
with torch.no_grad():
    outputs1 = model(**inputs1)
    outputs2 = model(**inputs2)

# Retrieve the last hidden states
vec1 = outputs1.hidden_states[-1].mean(dim=1)  # Get the last layer and mean pooling
vec2 = outputs2.hidden_states[-1].mean(dim=1)  # Get the last layer and mean pooling

# Example output
print("Vector 1:", vec1)
print("Vector 2:", vec2)

import torch
import torch.nn.functional as F

# Assuming vec1 and vec2 are your text embeddings
similarity = F.cosine_similarity(vec1, vec2)
print("Cosine Similarity:", similarity.item())

Output:

Vector 1: tensor([[-1.6176e-01,  2.5239e-01, -6.3237e-02,  1.1919e-01, -2.6126e-01,
         -8.0863e-02, -2.2205e-01,  7.4041e-02,  2.7497e-01, -1.0808e-01,
          2.7377e-01, -2.4553e-01,  4.9217e-02, -1.8269e-01, -2.6951e-02,
         -1.0678e-01,  3.6911e-01,  2.0687e-01,  2.2898e-02,  2.4484e-01,
          3.8302e-01,  9.0641e-02, -2.1530e-02, -3.1857e-01, -6.6188e-02,
         -1.9571e-01,  1.5709e-01, -4.6462e-01, -4.4910e-02,  3.5263e-01,
         -1.8303e-01, -2.2847e-01,  1.7568e-01,  7.0578e-02, -3.1226e-02,
          3.5688e-02,  2.6231e-01, -5.9350e-02,  1.7556e-01, -2.9727e-01,
          6.7127e-02, -6.4844e-03,  1.7980e-01, -1.2621e-01, -1.3776e-01,
         -2.8213e-01, -4.3571e-02, -2.0078e-02,  5.0481e-02, -1.0542e-01,
          2.2079e-01,  5.9069e-03,  1.5441e-01,  2.2205e-01, -8.0406e-02,
         -1.4657e-01,  2.9426e-01,  1.3707e-01, -2.3486e-01, -9.9600e-02,
         -1.7818e-01,  1.7346e-01, -4.9023e-01,  1.9931e-01,  1.0728e-01,
         -8.7433e-02,  3.3014e-01,  6.8642e-02, -4.1763e-02, -5.8191e-01,
         -3.3198e-02, -2.0218e-01,  3.8172e-02,  1.4329e-01,  9.8508e-02,
          4.0515e-01,  7.7218e-02, -1.4624e-01, -2.0954e-01,  4.7106e-02,
         -2.2298e-01, -1.2525e-01,  2.9343e-02,  1.1731e-01,  9.1716e-02,
         -8.9816e-02,  1.6940e-01,  2.7529e-01, -1.6878e-01, -1.0052e-01,
         -2.9069e-01, -2.0841e-01,  3.7753e-01,  6.3024e-02, -3.9105e-01,
         -2.0234e-01, -1.1539e-01,  8.4418e-02, -1.4179e-01,  1.6219e-01,
         -3.8225e-03,  1.4638e-01,  1.1963e-01, -4.9625e-02, -8.7117e-02,
         -1.3136e-01,  3.7629e-02, -1.4493e-01,  2.5865e-01, -1.2155e-01,
          1.8646e-01,  3.7706e-01,  1.6246e-01, -9.3256e-02,  7.0659e-02,
         -1.0356e-01,  2.9688e-01,  2.4314e-01,  2.9659e-02,  1.9471e-01,
          1.9183e-01,  2.5419e-02,  9.6937e-02, -6.9149e-02, -8.2890e-02,
         -1.4443e-01,  3.1907e-02, -3.7602e-01, -1.7956e-01, -8.3962e-02,
          3.7904e-01, -3.7010e-02,  1.3795e-01,  1.3871e-02,  1.1029e-01,
          1.9375e-01,  2.2885e+00, -7.2059e-02, -7.8902e-02, -1.2220e-01,
         -1.8915e-01, -1.0356e-01,  6.7498e-02,  6.8883e-02,  6.7045e-02,
          2.2816e-01, -8.1482e-02, -9.7369e-02, -6.2511e-02,  1.3601e-01,
         -6.2408e-02,  3.5382e-01,  2.7666e-03,  1.4361e-01, -1.9816e-01,
          1.7190e-01, -2.1372e-01,  7.5884e-02,  1.0218e-01, -2.1332e-01,
          3.4332e-02, -2.0518e-01, -2.8013e-02, -1.4251e-01, -6.3818e-02,
          2.4179e-01,  1.1210e-01,  2.2473e-01,  1.8885e-02, -2.2461e-02,
         -1.0390e-02,  3.1296e-01, -1.0750e-01, -2.2101e-01, -1.6769e-01,
         -2.7629e-01, -1.9850e-02,  1.2113e-01,  5.5650e-02, -4.4113e-03,
          3.2265e-01, -1.6398e-02,  8.6040e-02,  1.0155e-01,  6.1776e-02,
          6.7247e-02, -2.9969e-01,  2.7700e-01,  9.5528e-02, -5.8818e-02,
          3.0628e-02,  6.3967e-02, -4.2764e-03,  3.0135e-01,  1.5335e-01,
          1.3023e-02, -4.1580e-02,  2.6220e-01,  3.0850e-01, -2.8273e-01,
          1.2541e-03,  3.1330e-01,  1.2549e-01,  2.9057e-01, -1.9700e-01,
         -1.7225e-01,  4.8072e-02, -5.3778e-01, -8.6238e-02, -7.1039e-02,
         -1.4784e-01, -1.6196e-01,  9.0132e-02,  2.2106e-01,  9.2635e-03,
         -7.4501e-02,  5.9309e-02,  3.0647e-01,  4.1773e-01, -2.1448e-01,
         -1.3703e-02,  6.0828e-02,  9.2911e-02,  3.2828e-01, -7.8134e-02,
         -7.0754e-02, -9.9274e-02, -9.6955e-02, -1.1535e-01, -2.0488e-01,
          5.7874e-01,  1.1427e-01,  1.1853e-01, -1.4496e-01,  1.8402e-01,
         -1.1193e-01,  9.1887e-02, -2.5803e-01, -4.0146e-01,  3.1348e-01,
         -7.2966e-01,  7.3190e-02,  5.8248e-02,  1.1630e-01, -3.5544e-02,
          1.3606e-01,  6.0095e-01,  2.1565e-01,  1.0422e-01,  4.2104e-01,
          5.4488e-02,  4.1729e-02, -1.6491e-01,  3.1636e-01,  3.1654e-02,
         -5.1085e-01, -1.3908e-01, -5.9687e-02,  3.3603e-01, -1.8190e-03,
          4.2119e-01, -7.0282e-02,  1.1561e-01,  6.3062e-02,  4.1171e-04,
         -1.0642e-01, -1.3320e-01, -8.2845e-02,  1.0005e-01,  1.4878e-02,
         -1.9997e-01,  2.0504e-01, -5.8684e-03,  8.8032e-02,  1.1638e-01,
          1.7246e-01,  2.7374e-01, -1.6513e-01,  2.1071e-02,  2.2678e-01,
          1.8214e-01,  6.3026e-02,  1.0400e-01,  1.2368e-01,  2.2763e-01,
          1.8714e-01,  1.2398e-02,  2.1013e-01, -4.6318e-04,  2.8857e-01,
          8.9215e-02,  9.5903e-02,  1.7049e-01, -2.6891e-01,  6.2839e-02,
          2.1528e-01,  6.0576e-02,  6.9568e-03, -2.5189e-01, -1.0626e-01,
          1.7184e-02,  2.7726e-01,  2.6890e-01,  1.0106e-01,  3.4020e-01,
          8.0985e-03,  9.4553e-02, -5.9867e-02,  3.2059e-01,  1.4006e-01,
          3.0978e-02,  1.1950e-01]])
Vector 2: tensor([[-1.9617e-01,  3.5677e-01,  1.0947e-02, -4.8789e-02,  5.8909e-02,
         -1.4251e-01, -1.5077e-01, -5.2587e-02,  8.0777e-02, -1.7510e-01,
          3.5651e-02,  4.2503e-02,  1.0218e-01, -5.8492e-03, -2.0642e-01,
         -1.2134e-01, -1.4821e-01,  3.1694e-01,  2.2935e-01,  2.9692e-01,
          1.5660e-01,  2.4622e-01,  3.3920e-01, -1.8708e-01, -2.7468e-01,
         -2.0269e-01,  2.7421e-01, -1.8543e-01,  2.4410e-02,  4.0107e-03,
         -3.1879e-01, -1.9219e-01,  6.3619e-02,  2.0481e-01,  2.0196e-01,
         -1.4528e-01,  2.1086e-01, -5.5150e-03,  3.1851e-01, -2.3599e-01,
          6.1155e-02, -2.9535e-01,  3.0466e-01, -1.4684e-01, -2.2172e-01,
         -1.5614e-01, -1.5921e-01, -5.5837e-02, -1.7454e-01, -1.0959e-01,
          1.3667e-01, -1.1712e-01,  7.6150e-02,  4.6926e-01, -1.6502e-01,
         -1.5886e-01,  4.0137e-01,  1.5740e-01,  1.5027e-01, -3.2949e-02,
         -1.1407e-01,  1.2693e-01, -4.1888e-01,  1.3307e-01, -1.8239e-01,
          3.0158e-02,  3.2127e-01,  1.6587e-01, -1.2011e-01, -2.8872e-01,
         -1.7438e-01,  1.7147e-01,  6.2647e-03,  4.8184e-02, -1.6709e-01,
          4.2268e-01, -4.9462e-02,  1.8884e-01, -2.8204e-01,  1.6417e-01,
         -1.0913e-01,  2.0740e-01,  1.3397e-01,  3.3936e-01, -4.9542e-02,
         -2.1018e-01,  2.6553e-01,  2.7694e-01, -3.8273e-01, -1.3967e-01,
         -4.7833e-02, -1.6834e-01, -2.9105e-02, -1.2339e-01, -3.1396e-01,
         -1.7288e-01,  1.4499e-01,  3.2408e-02, -1.6854e-01,  2.7425e-01,
          2.6245e-01,  1.7689e-01,  1.8005e-01, -2.0244e-01, -5.1067e-02,
         -8.5220e-02,  5.8231e-03,  9.4639e-02,  3.7457e-01,  1.0299e-01,
          4.8256e-02,  3.2578e-01,  1.6426e-01, -3.1210e-01,  1.6878e-01,
         -2.7207e-01,  3.5744e-01,  2.2933e-02,  1.5805e-01,  6.2498e-02,
          1.6667e-01, -1.9574e-01, -1.5181e-02, -2.7610e-01, -1.9766e-01,
         -2.2909e-02, -5.4189e-02, -1.8462e-01, -1.6572e-01,  1.7833e-01,
          4.3290e-01, -1.2321e-01, -1.0497e-01, -3.5479e-02,  3.6297e-01,
          5.8330e-01,  2.1674e+00, -1.2130e-02,  1.4917e-01, -5.1050e-02,
         -1.3780e-01, -6.1948e-02, -1.4535e-01,  9.0849e-02, -1.8551e-02,
          3.4229e-02, -3.2806e-02, -1.0551e-01,  6.0735e-03,  1.4587e-01,
          4.6094e-01, -1.9858e-02,  2.0986e-01,  2.9235e-01,  1.1934e-02,
          6.0542e-02, -1.4616e-01, -4.8496e-02,  1.6938e-01, -3.2467e-01,
          2.7058e-01, -1.9697e-01,  1.1031e-01,  8.2267e-02,  5.8402e-02,
          2.3547e-03, -1.4508e-01,  8.0381e-02,  1.4245e-01,  1.2032e-01,
          1.0221e-01,  3.6256e-01, -3.1514e-01, -3.8728e-01, -1.4015e-01,
         -2.0147e-01, -8.9669e-02,  1.1489e-01, -1.7482e-01, -3.4487e-02,
          5.0290e-01, -6.5424e-02, -1.5529e-01,  9.1046e-02, -1.6303e-01,
          8.3655e-02, -2.0989e-01,  4.2992e-01,  7.5667e-03, -1.7544e-01,
         -2.5307e-02, -6.7718e-02,  2.1843e-04,  2.9668e-01, -5.7269e-02,
         -5.5235e-01, -3.8762e-02,  1.3290e-01,  6.2068e-02, -8.6721e-02,
          1.8003e-01,  1.4879e-01,  1.4545e-01,  3.7537e-01, -6.4597e-03,
         -4.0302e-01,  2.9031e-01, -2.1201e-01, -1.4703e-01, -7.2566e-02,
         -3.1821e-02, -4.4197e-02,  1.4240e-01,  4.1754e-01, -1.8117e-01,
         -1.0411e-01,  1.1577e-01,  1.1174e-01,  2.2864e-01, -3.3698e-02,
          3.0748e-01,  2.9511e-01, -2.4779e-01,  3.8628e-01,  2.2324e-02,
         -2.8744e-01,  7.8816e-03, -1.7164e-01,  4.5538e-02, -4.1535e-02,
          3.6103e-01,  5.7958e-02, -9.2825e-02,  3.7072e-02,  7.8020e-02,
         -1.1884e-01,  6.1809e-02, -1.8285e-01, -5.5346e-01,  3.9434e-01,
         -5.3198e-01,  3.7788e-01,  1.5552e-03,  1.6343e-01, -8.8287e-02,
         -1.2186e-01,  6.0413e-01,  1.9365e-01, -6.3386e-02,  3.1055e-01,
         -1.7307e-01,  3.1338e-01, -2.6219e-01,  9.2962e-02,  1.4332e-01,
         -3.0283e-01, -1.6710e-02, -9.1856e-02,  2.2114e-01, -5.0838e-02,
          2.9368e-01, -2.2025e-01,  1.2726e-01,  1.6582e-02, -3.2583e-02,
          6.0355e-02, -9.8103e-02, -3.4082e-01, -7.9041e-02, -5.9847e-03,
         -5.0291e-02,  2.0305e-01, -6.4244e-02,  4.7995e-02,  1.2273e-01,
          2.2634e-02, -1.0295e-01, -2.3563e-01,  1.5786e-01, -1.6630e-01,
          1.7214e-01, -9.4166e-03,  3.1295e-01,  1.3355e-01,  6.7792e-02,
          6.0998e-02,  6.9048e-02,  1.9765e-01,  2.0917e-01,  3.1934e-01,
          1.9316e-01,  2.5168e-01, -1.5111e-01,  1.3534e-02,  5.9723e-03,
          1.2325e-01, -3.7568e-02,  6.1539e-02, -3.4860e-01, -2.4287e-01,
          3.8284e-01,  2.5303e-01,  2.8087e-01, -2.7917e-01,  2.4496e-01,
          2.4167e-01,  3.4012e-02, -1.8614e-01, -2.3446e-02, -4.8121e-02,
          1.7157e-01,  3.3033e-01]])
Cosine Similarity: 0.7423540353775024

5. Interactive Applications

import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Disable WandB logging by setting the environment variable
os.environ["WANDB_DISABLED"] = "true"

# Load the tokenizer and model for text generation
model_name = "gpt2"  # You can replace this with a model of your choice
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Initialize conversation context
conversation_history = []

def chatbot_response(user_input):
    # Update the conversation history with the user input
    conversation_history.append(f"User: {user_input}")

    # Prepare the conversation context for input to the model
    context = "\n".join(conversation_history)

    # Tokenize the input with conversation context
    inputs = tokenizer(context, return_tensors="pt", max_length=512, truncation=True)

    # Generate response
    with torch.no_grad():
        outputs = model.generate(inputs['input_ids'], max_length=150, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)

    # Decode the output and extract the model's response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract the last part of the response as the chatbot's reply
    # Assuming the latest response follows the last user input
    bot_response = response.split("User:")[-1].strip()  # Get the latest part after the last "User:"

    # Update conversation history with the bot's response
    conversation_history.append(f"Bot: {bot_response}")

    return bot_response

# Example usage
user_input = "Hello, how can I help you?"
response = chatbot_response(user_input)
print("Bot:", response)

# Continue the conversation
user_input = "What is your favorite programming language?"
response = chatbot_response(user_input)
print("Bot:", response)

user_input = "Tell me a joke."
response = chatbot_response(user_input)
print("Bot:", response)

Output:

Bot: I
Bot: What is your favorite programming
Bot: Tell me a joke.
Bot