DataViz.manishdatt.com

The Languages of the World

Distribution of language status for top 10 countries with most languages.

By Manish Datt

TidyTuesday dataset of 2025-12-23

Languages

Endangered Status

Families

Combined Data

Plotting code



    <link href="https://unpkg.com/tabulator-tables@6.3.1/dist/css/tabulator.min.css" rel="stylesheet">
    <script src="https://cdn.jsdelivr.net/npm/d3@7"></script>
    <script src="https://cdn.jsdelivr.net/npm/@observablehq/plot@0.6.11/dist/plot.umd.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/htl@0.3.1/dist/htl.min.js"></script>
    <script type="text/javascript" src="https://unpkg.com/tabulator-tables@6.3.1/dist/js/tabulator.min.js"></script>
    <script src="https://unpkg.com/papaparse@5.4.1/papaparse.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/danfojs@1.1.2/lib/bundle.js"></script>

<script type="module">
    // Import the ES module version of i18n-iso-countries
    import countries from 'https://cdn.skypack.dev/i18n-iso-countries';

    async function init() {
        // Fetch the English language data
        const response = await fetch('https://cdn.jsdelivr.net/npm/i18n-iso-countries@7.11.0/langs/en.json');
        const data = await response.json();
        
        // Register the English locale
        countries.registerLocale(data);
        
        // Make the getCountryName function available globally
        window.getCountryName = function(code) {
            return countries.getName(code.toUpperCase(), 'en') || code;
        };
    }

    init();
</script>

<div class="mx-auto max-w-6xl">
    <h3 class="mb-4 text-xl font-bold text-slate-800">Languages</h3>
    <div id="languages-table" class="mb-8 overflow-x-auto"></div>

    <div class="mb-8 grid grid-cols-1 gap-8 md:grid-cols-2">
        <div class="flex flex-col">
            <h3 class="mb-4 text-xl font-bold text-slate-800">Endangered Status</h3>
            <div id="endangered-table" class="overflow-x-auto"></div>
        </div>
        <div class="flex flex-col">
            <h3 class="mb-4 text-xl font-bold text-slate-800">Families</h3>
            <div id="families-table" class="overflow-x-auto"></div>
        </div>
    </div>

    <h3 class="mb-4 text-xl font-bold text-slate-800">Combined Data</h3>
    <div id="combined-table" class="mb-12 overflow-x-auto"></div>

    <div class="rounded-lg border border-slate-100 bg-slate-50/50 p-6">
        <div id="stacked-barplot" class="w-full"></div>
        <div id="status-filters" class="mt-4 flex flex-wrap gap-2"></div>
    </div>
</div>

    <script>
        window.addEventListener('load', () => {
            Promise.all([
                fetch('endangered_status.csv').then(r => r.text()),
                fetch('families.csv').then(r => r.text()),
                fetch('languages.csv').then(r => r.text())
            ])
            .then(([endText, famText, langText]) => {
                const endData = Papa.parse(endText, {header: true}).data;
                const famData = Papa.parse(famText, {header: true}).data;
                const langData = Papa.parse(langText, {header: true}).data;

                // Create maps for merging
                const endangeredMap = new Map(endData.map(row => [row.id, row]));
                const familiesMap = new Map(famData.map(row => [row.id, row]));

                // Merge data using the original approach
                const combinedData = langData.map(lang => {
                    const end = endangeredMap.get(lang.id);
                    const fam = familiesMap.get(lang.family_id);
                    return {
                        id: lang.id,
                        name: lang.name,
                        latitude: lang.latitude,
                        longitude: lang.longitude,
                        iso639P3code: lang.iso639P3code,
                        countries: lang.countries,
                        glottocode: lang.glottocode,
                        macroarea: lang.macroarea,
                        family_id: lang.family_id,
                        family: fam ? fam.family : '',
                        status_code: end ? end.status_code : '',
                        status_label: end ? end.status_label : ''
                    };
                });

                // Create tables
                new Tabulator("#endangered-table", {
                    data: endData,
                    columns: [
                        {title: "ID", field: "id"},
                        {title: "Status Code", field: "status_code"},
                        {title: "Status Label", field: "status_label"}
                    ],
                    layout: "fitColumns",
                    pagination: "local",
                    paginationSize: 5
                });

                new Tabulator("#families-table", {
                    data: famData,
                    columns: [
                        {title: "ID", field: "id"},
                        {title: "Family", field: "family"}
                    ],
                    layout: "fitColumns",
                    pagination: "local",
                    paginationSize: 5
                });

                new Tabulator("#languages-table", {
                    data: langData,
                    columns: [
                        {title: "ID", field: "id"},
                        {title: "Name", field: "name"},
                        {title: "Latitude", field: "latitude"},
                        {title: "Longitude", field: "longitude"},
                        {title: "ISO 639-3 Code", field: "iso639P3code"},
                        {title: "Countries", field: "countries"},
                        {title: "Glottocode", field: "glottocode"},
                        {title: "Macroarea", field: "macroarea"},
                        {title: "Family ID", field: "family_id"}
                    ],
                    layout: "fitColumns",
                    pagination: "local",
                    paginationSize: 5
                });

                new Tabulator("#combined-table", {
                    data: combinedData,
                    columns: [
                        {title: "ID", field: "id"},
                        {title: "Name", field: "name"},
                        {title: "Latitude", field: "latitude"},
                        {title: "Longitude", field: "longitude"},
                        {title: "ISO 639-3 Code", field: "iso639P3code"},
                        {title: "Countries", field: "countries"},
                        {title: "Glottocode", field: "glottocode"},
                        {title: "Macroarea", field: "macroarea"},
                        {title: "Family ID", field: "family_id"},
                        {title: "Family", field: "family"},
                        {title: "Status Code", field: "status_code"},
                        {title: "Status Label", field: "status_label"}
                    ],
                    layout: "fitColumns",
                    pagination: "local",
                    paginationSize: 5
                });

                // Process data for status grouping using danfojs
                const df = new dfd.DataFrame(combinedData);
                
                // Process countries column to handle multiple countries per language
                const explodedData = [];
                combinedData.forEach(row => {
                    const countries = row.countries ? row.countries.split(';').map(c => c.trim()).filter(c => c) : [];
                    countries.forEach(country => {
                        explodedData.push({
                            country: country,
                            status_code: row.status_code,
                            status_label: row.status_label,
                            language: row.name
                        });
                    });
                });

                // Create new dataframe with exploded countries
                const explodedDf = new dfd.DataFrame(explodedData);
                
                // Group by country and status code, then count languages
                const grouped = explodedDf.groupby(['country', 'status_code']);
                const result = grouped.count().resetIndex();
                
                // The count column might be named 'language' or something else, let's check
                // and rename it to 'count' if needed
                if (result.columns.includes('language')) {
                    result.rename({language: 'count'}, inplace=true);
                } else if (result.columns.includes('status_label')) {
                    result.rename({status_label: 'count'}, inplace=true);
                }
                
                // Sort by count descending
                const countColumn = result.columns.find(col => col !== 'country' && col !== 'status_code');
                
                if (countColumn) {
                    result.sortValues(countColumn, {ascending: false}, inplace=true);
                }
                
                // Convert the result to the format Tabulator expects
                const tableData = result.values.map((row, index) => {
                    return {
                        id: index,
                        country: getCountryName(row[0]),
                        status_code: row[1],
                        status_label_count: row[2],
                        language_count: row[3]
                    };
                });
                
                // Create column definitions using column names
                const columnDefinitions = [
                    {title: "Country", field: "country"},
                    {title: "Status Code", field: "status_code"},
                    {title: "Status Label Count", field: "status_label_count"},
                    {title: "Language Count", field: "language_count"}
                ];
                

                // Process data for stacked bar plot
                // Sort grouped data by status code first
                const sortedResult = [...result.values].sort((a, b) => a[1].localeCompare(b[1]));

                // Get top 10 countries by total language count
                const countryTotals = new Map();
                result.values.forEach(row => {
                    const country = row[0];
                    const langCount = row[3]; // language_count is at index 3
                    countryTotals.set(country, (countryTotals.get(country) || 0) + langCount);
                });

                // Get top 10 countries
                const top10Countries = Array.from(countryTotals.entries())
                    .sort((a, b) => b[1] - a[1])
                    .slice(0, 10)
                    .map(([country]) => country);

                // Filter data for top 10 countries and transform for stacked bar plot
                const stackedData = [];
                sortedResult.forEach(row => {
                    const country = row[0];
                    const status = row[1];
                    const count = row[3];
                    
                    if (top10Countries.includes(country)) {
                        // Replace blank status code with "0" and filter only status codes 0-6
                        const processedStatus = status === '' || status === null || status === undefined ? '0' : status;
                        
                        // Only include status codes 0-6
                        if (processedStatus >= '0' && processedStatus <= '6') {
                            stackedData.push({
                                country: getCountryName(country),
                                status: processedStatus,
                                count: count
                            });
                        }
                    }
                });

                // Prepare data for horizontal stacked bar plot
                // Sort countries by total count (descending) for y-axis
                const countryData = {};
                stackedData.forEach(item => {
                    if (!countryData[item.country]) {
                        countryData[item.country] = {
                            country: item.country,
                            statusCounts: {}
                        };
                    }
                    countryData[item.country].statusCounts[item.status] = item.count;
                });

                // Convert to array and sort by total count
                const sortedCountries = Object.values(countryData).sort((a, b) => {
                    const totalA = Object.values(a.statusCounts).reduce((sum, count) => sum + count, 0);
                    const totalB = Object.values(b.statusCounts).reduce((sum, count) => sum + count, 0);
                    return totalB - totalA;
                });

                // Flatten data for plotting and calculate totals
                const stackedPlotData = [];
                const barCountryTotals = {};
                
                sortedCountries.forEach(countryData => {
                    const total = Object.values(countryData.statusCounts).reduce((sum, count) => sum + count, 0);
                    barCountryTotals[countryData.country] = total;
                    
                    Object.entries(countryData.statusCounts).forEach(([status, count]) => {
                        stackedPlotData.push({
                            country: countryData.country,
                            status: status,
                            count: count
                        });
                    });
                });

                // Define unique colors for each status code (0-6)
                const statusColors = {
                    "0": "#BDBDBD",  
                    "1": "#2E7D32",  
                    "2": "#9CCC65",  
                    "3": "#FBC02D",  
                    "4": "#EF6C00",  
                    "5": "#C62828",  
                    "6": "#6D5C6D"   
                };

                // Create mapping from status code to status label
                const statusLabels = {};
                endData.forEach(row => {
                    if (row.status_code && row.status_code >= 0 && row.status_code <= 6) {
                        statusLabels[row.status_code] = row.status_label || "NA";
                    }
                });
                
                // Ensure status 0 has "NA" label
                statusLabels["0"] = "NA";

                // Get unique status codes and create checkboxes
                const uniqueStatuses = [...new Set(stackedPlotData.map(d => d.status))];
                const statusFilters = {};
                uniqueStatuses.forEach(status => {
                    statusFilters[status] = true;
                });

                // Create checkboxes
                const filterContainer = document.getElementById('status-filters');
                
                // Create "All" checkbox
                const allLabel = document.createElement('label');
                allLabel.style.marginRight = '15px';
                allLabel.style.display = 'inline-block';
                allLabel.style.fontWeight = 'bold';
                
                const allCheckbox = document.createElement('input');
                allCheckbox.type = 'checkbox';
                allCheckbox.checked = true;
                allCheckbox.id = 'all-status';
                
                allCheckbox.addEventListener('change', function() {
                    const isChecked = this.checked;
                    uniqueStatuses.forEach(status => {
                        statusFilters[status] = isChecked;
                        const statusCheckbox = document.querySelector(`input[value="${status}"]`);
                        if (statusCheckbox) {
                            statusCheckbox.checked = isChecked;
                        }
                    });
                    updateStackedBarPlot();
                });
                
                allLabel.appendChild(allCheckbox);
                allLabel.appendChild(document.createTextNode('All'));
                filterContainer.appendChild(allLabel);
                
                // Create individual status checkboxes
                uniqueStatuses.forEach(status => {
                    const label = document.createElement('label');
                    label.style.marginRight = '15px';
                    label.style.display = 'inline-block';
                    
                    const checkbox = document.createElement('input');
                    checkbox.type = 'checkbox';
                    checkbox.checked = true;
                    checkbox.value = status;
                    
                    checkbox.addEventListener('change', function() {
                        statusFilters[status] = this.checked;
                        
                        // Update "All" checkbox state based on individual checkboxes
                        const allChecked = uniqueStatuses.every(s => statusFilters[s]);
                        allCheckbox.checked = allChecked;
                        
                        updateStackedBarPlot();
                    });
                    
                    label.appendChild(checkbox);
                    const statusText = document.createTextNode(statusLabels[status] || `Status ${status}`);
                    label.appendChild(statusText);
                    
                    // Apply color to the label text
                    label.style.color = statusColors[status] || "#000000";
                    
                    filterContainer.appendChild(label);
                });

                // Function to update stacked bar plot based on filters
                function updateStackedBarPlot() {
                    const filteredData = stackedPlotData.filter(d => statusFilters[d.status]);
                    
                    // Recalculate totals
                    const filteredTotals = {};
                    filteredData.forEach(d => {
                        if (!filteredTotals[d.country]) {
                            filteredTotals[d.country] = 0;
                        }
                        filteredTotals[d.country] += d.count;
                    });
                    
                    // Update plot
                    const updatedPlot = Plot.plot({
                        title: htl.html`<span class="fs-3">Status distribution for top 10 countries with most languages</span>`,
                        marks: [
                            Plot.barX(filteredData, {x: "count", y: "country", fill: "status"}),
                            Plot.text(filteredData.map(d => ({
                                country: d.country,
                                count: filteredTotals[d.country] || 0
                            })), {x: "count", y: "country", text: "count", textAnchor: "start", dx: 8, fill: "black", fontWeight: "bold", fontSize: 12})
                        ],
                        x: {label: null, tickFormat: null, ticks: null, tickSize: 0, axis: null},
                        y: {label: null, domain: sortedCountries.map(d => d.country), ticks: null, tickSize: 0},
                        color: {
                            type: "ordinal",
                            domain: Object.keys(statusColors),
                            range: Object.values(statusColors)
                        },
                        width: 700,
                        height: 400,
                        marginLeft: 170,  // Increase left margin for longer country names
                        marginRight: 30,   // Reduce right margin to shift plot right
                        style: {
                            fontSize: "14px"
                        }
                    });
                    
                    // Replace old plot with new one
                    const plotContainer = document.getElementById('stacked-barplot');
                    plotContainer.innerHTML = '';
                    plotContainer.appendChild(updatedPlot);
                }

                // Create initial stacked bar plot
                const stackedBarPlot = Plot.plot({
                    title: htl.html`<span class="fs-3">Status distribution for top 10 countries with most languages</span>`,
                    marks: [
                        Plot.barX(stackedPlotData, {x: "count", y: "country", fill: "status"}),
                        Plot.text(stackedPlotData.map(d => ({
                            country: d.country,
                            count: barCountryTotals[d.country]
                        })), {x: "count", y: "country", text: "count", textAnchor: "start", dx: 8, fill: "black", fontWeight: "bold", fontSize: 12})
                    ],
                    x: {label: null, tickFormat: null, ticks: null, tickSize: 0, axis: null},
                    y: {label: null, domain: sortedCountries.map(d => d.country), ticks: null, tickSize: 0},
                    color: {
                        type: "ordinal",
                        domain: Object.keys(statusColors),
                        range: Object.values(statusColors)
                    },
                    width: 700,
                    height: 400,
                    marginLeft: 170,  // Increase left margin for longer country names
                    marginRight: 30,   // Reduce right margin to shift plot right
                    style: {
                        fontSize: "14px"
                    }
                });
                document.getElementById('stacked-barplot').appendChild(stackedBarPlot);
            })
            .catch(error => console.error('Error loading CSV files:', error));
        });
    </script>