Detecting french characters in csv file

Advertisements

I am trying to count the number of rows where any french character is present. Here is my code:

def has_french_characters(text):
    # Check if text contains any French characters
    french_characters = ['à', 'â', 'ç', 'é', 'è', 'ê', 'ë', 'î', 'ï', 'ô', 'û', 'ù', 'ü', 'ÿ', 'ñ', 'æ', 'œ',
                         'À', 'Â', 'Ç', 'É', 'È', 'Ê', 'Ë', 'Î', 'Ï', 'Ô', 'Û', 'Ù', 'Ü', 'Ÿ', 'Ñ', 'Æ', 'Œ']
    return any(char in french_characters for char in text)

def count_rows_with_french_characters(filename):
    total_rows = 0
    rows_with_french_characters = 0

    with open(filename, 'r', encoding='utf-8', errors='ignore') as file:
        for line in file:
            total_rows += 1
            if has_french_characters(line):
                rows_with_french_characters += 1

    return total_rows, rows_with_french_characters

# Replace 'your_dataset.csv' with the path to your dataset file
filename = 'your_dataset.csv'

total_rows, rows_with_french_characters = count_rows_with_french_characters(filename)

print(f'Total rows in the dataset: {total_rows}')
print(f'Rows with French characters: {rows_with_french_characters}')

This is not working as it is returning 0 rows with french characters which is incorrect as I can see multiple french characters in the csv.

The detected encoding of the CSV file is: ascii

>Solution :

utf-8 modifying the string to utf format. So you are not able to see french characters.
Try with latin1

Eg:

with open(filename, 'r', encoding='latin1', errors='ignore') as file:

Leave a ReplyCancel reply