Transformers Pipeline: Translate English to French
Mohamad's interest is in Programming (Mobile, Web, Database and Machine Learning). He is studying at the Center For Artificial Intelligence Technology (CAIT), Universiti Kebangsaan Malaysia (UKM).
[1] Get dataset
import pandas as pd
# Load dataset
dset_url = 'https://archive.org/download/misc-dataset/airline-tweets.csv'
df_airline = pd.read_csv(dset_url)
# Display information about the dataset
df_airline.info()
[2] Clean
import pandas as pd
# !pip install neattext
import neattext.functions as nfx
# Clean the text data
df_airline['clean'] = df_airline['text'] \
.apply(nfx.remove_userhandles) \
.apply(nfx.remove_numbers) \
.apply(nfx.fix_contractions) \
.apply(nfx.remove_punctuations) \
.apply(nfx.remove_hashtags) \
.apply(nfx.remove_urls) \
.apply(str.lower)
[3] Translate
(approx 30 min with CUDA)
from transformers import pipeline
# Load the translation pipeline with CUDA (GPU)
translator = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr", device=0) # device=0 for the first GPU
# Function to translate text
def translate_to_french(text):
try:
# Ensure the input text is not empty
if not text or not text.strip():
return "" # Return an empty string for empty or invalid input
# Translate the text
translated = translator(text, max_length=50) # Adjust max_length as needed
# Check if the translation result is not empty
if translated and isinstance(translated, list) and len(translated) > 0:
return translated[0]['translation_text']
else:
return "" # Return an empty string if translation fails
except Exception as e:
print(f"Error translating text: {text}. Error: {e}")
return "" # Return an empty string in case of any error
# Example usage with a DataFrame
df_airline['clean_french'] = df_airline['clean'].apply(translate_to_french)
# Display the DataFrame
display(df_airline)
[4] Preview
display(df_airline[['phrase','phrase_french']].head())
[5] Save
df_airline.to_csv("airline-tweets-clean-french.csv", index=False, header=True)
.