Skip to main content

Command Palette

Search for a command to run...

Universal Sentence Encoder for Sentence Embedding and Sentence Similarity Tasks

Updated
4 min read
M

Mohamad's interest is in Programming (Mobile, Web, Database and Machine Learning). He is studying at the Center For Artificial Intelligence Technology (CAIT), Universiti Kebangsaan Malaysia (UKM).

The Universal Sentence Encoder (USE) is a model developed by Google that encodes sentences into fixed-length embeddings. These embeddings can be used for various natural language processing tasks such as semantic similarity, text classification, and clustering.

Key Features:

  • Versatility: USE can handle a variety of tasks including sentence similarity, sentiment analysis, and more.

  • Pre-trained Models: It comes with pre-trained models that can be fine-tuned for specific tasks or used directly.

  • Multilingual Support: There are versions of USE that support multiple languages, making it useful for diverse applications.

Use Cases:

  • Semantic Search: Finding relevant documents based on natural language queries.

  • Text Classification: Automating categorization of text data.

  • Clustering: Grouping similar sentences or documents for analysis.

SentenceEmbedding.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Sentence Embeddings with TensorFlow.js</title>
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/universal-sentence-encoder"></script>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 20px;
        }
        input[type="text"] {
            width: 300px;
            padding: 5px;
            margin-bottom: 10px;
        }
        button {
            padding: 5px 10px;
            margin-bottom: 20px;
        }
        #output {
            margin-top: 20px;
            padding: 10px;
            border: 1px solid #ccc;
            background-color: #f9f9f9;
            white-space: pre-wrap;
        }
        #loading {
            display: none;
            margin-top: 10px;
            font-size: 14px;
            color: #555;
        }
        #loading::after {
            content: " Loading...";
            animation: dots 1.5s infinite;
        }
        @keyframes dots {
            0%, 20% {
                content: " Loading...";
            }
            40% {
                content: " Loading..";
            }
            60% {
                content: " Loading.";
            }
            80% {
                content: " Loading";
            }
        }
    </style>
</head>
<body>
    <h1>Universal Sentence Encoder Example</h1>
    <p>Enter a sentence to generate its embedding:</p>
    <input type="text" id="sentenceInput" placeholder="Type a sentence here..." style="width: 300px; padding: 5px;">
    <button onclick="generateEmbedding()">Generate Embedding</button>

    <div id="loading"></div>
    <div id="output">Embedding will appear here...</div>

    <script>
        async function loadModel() {
            // Show loading indicator
            document.getElementById('loading').style.display = 'block';

            // Load the Universal Sentence Encoder (USE) model
            const model = await use.load();

            // Hide loading indicator after model is loaded
            document.getElementById('loading').style.display = 'none';
            return model;
        }

        async function getSentenceEmbedding(model, sentence) {
            // Generate embeddings for the given sentence
            const embeddings = await model.embed([sentence]);
            return embeddings;
        }

        async function generateEmbedding() {
            try {
                // Get the input sentence from the text box
                const sentence = document.getElementById('sentenceInput').value;
                if (!sentence.trim()) {
                    alert("Please enter a valid sentence.");
                    return;
                }

                // Clear previous output
                document.getElementById('output').textContent = "Processing...";

                // Load the model
                const model = await loadModel();

                // Get the embedding for the sentence
                const embedding = await getSentenceEmbedding(model, sentence);

                // Convert the embedding tensor to an array
                const embeddingArray = await embedding.array();

                // Display the embedding in the output div
                const outputDiv = document.getElementById('output');
                outputDiv.textContent = `Embedding for "${sentence}":\n${JSON.stringify(embeddingArray, null, 2)}`;
            } catch (error) {
                console.error("Error:", error);
                document.getElementById('output').textContent = "An error occurred while generating the embedding.";
            } finally {
                // Ensure loading indicator is hidden
                document.getElementById('loading').style.display = 'none';
            }
        }
    </script>
</body>
</html>

Output:

SentenceSimilarity.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Sentence Similarity with TensorFlow.js</title>
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/universal-sentence-encoder"></script>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 20px;
        }
        input[type="text"] {
            width: 300px;
            padding: 5px;
            margin-bottom: 10px;
        }
        button {
            padding: 5px 10px;
            margin-bottom: 20px;
        }
        #output {
            margin-top: 20px;
            padding: 10px;
            border: 1px solid #ccc;
            background-color: #f9f9f9;
            white-space: pre-wrap;
        }
        #loading {
            display: none;
            margin-top: 10px;
            font-size: 14px;
            color: #555;
        }
        #loading::after {
            content: " Loading...";
            animation: dots 1.5s infinite;
        }
        @keyframes dots {
            0%, 20% {
                content: " Loading...";
            }
            40% {
                content: " Loading..";
            }
            60% {
                content: " Loading.";
            }
            80% {
                content: " Loading";
            }
        }
    </style>
</head>
<body>
    <h1>Sentence Similarity Example</h1>
    <p>Enter two sentences to calculate their similarity:</p>
    <input type="text" id="sentence1" placeholder="Enter first sentence..." style="margin-right: 10px;">
    <input type="text" id="sentence2" placeholder="Enter second sentence...">
    <br><br>
    <button onclick="calculateSimilarity()">Calculate Similarity</button>

    <div id="loading"></div>
    <div id="output">Similarity score will appear here...</div>

    <script>
        async function loadModel() {
            // Show loading indicator
            document.getElementById('loading').style.display = 'block';

            // Load the Universal Sentence Encoder (USE) model
            const model = await use.load();

            // Hide loading indicator after model is loaded
            document.getElementById('loading').style.display = 'none';
            return model;
        }

        async function getSentenceEmbeddings(model, sentences) {
            // Generate embeddings for the given sentences
            const embeddings = await model.embed(sentences);
            return embeddings;
        }

        function cosineSimilarity(embedding1, embedding2) {
            // Calculate cosine similarity between two vectors
            const dotProduct = tf.sum(tf.mul(embedding1, embedding2));
            const norm1 = tf.norm(embedding1);
            const norm2 = tf.norm(embedding2);
            return dotProduct.div(norm1.mul(norm2)).arraySync();
        }

        async function calculateSimilarity() {
            try {
                // Get the input sentences from the text boxes
                const sentence1 = document.getElementById('sentence1').value.trim();
                const sentence2 = document.getElementById('sentence2').value.trim();

                if (!sentence1 || !sentence2) {
                    alert("Please enter both sentences.");
                    return;
                }

                // Clear previous output and show loading indicator
                document.getElementById('output').textContent = "Processing...";
                document.getElementById('loading').style.display = 'block';

                // Load the model
                const model = await loadModel();

                // Get embeddings for the two sentences
                const embeddings = await getSentenceEmbeddings(model, [sentence1, sentence2]);

                // Convert embeddings to tensors
                const embedding1 = embeddings.slice([0, 0], [1, embeddings.shape[1]]);
                const embedding2 = embeddings.slice([1, 0], [1, embeddings.shape[1]]);

                // Calculate cosine similarity
                const similarityScore = cosineSimilarity(embedding1, embedding2);

                // Display the similarity score in the output div
                const outputDiv = document.getElementById('output');
                outputDiv.textContent = `Similarity score between:\n"${sentence1}"\nand\n"${sentence2}":\n${similarityScore.toFixed(4)}`;
            } catch (error) {
                console.error("Error:", error);
                document.getElementById('output').textContent = "An error occurred while calculating similarity.";
            } finally {
                // Ensure loading indicator is hidden
                document.getElementById('loading').style.display = 'none';
            }
        }
    </script>
</body>
</html>

Output: