Demonstration Statistics#

This part of the book present some figures regarding the popularity of the classroom demonstrations within the Applied Sciences faculty of Delft University of Technology. Below you will find histograms presenting the number of times a demonstration was presented. Each histogram presents data for demos in a particular field of physics, such as, Mechanics, Fluid Mechanics, Oscillations and Waves, etc. Data was collected over a period of 12 academic years.

Histograms#

Hide code cell source
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 30 13:29:31 2024
File reads data from an excel file and plots histograms
"""

{
    "tags": [
        "hide-input",
    ]
}

import pandas as pd
import numpy as np
import os
import re
from IPython.display import display, Javascript
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, TapTool, CustomJS, Label
from bokeh.models.tools import HoverTool
from bokeh.models.annotations import Title
from bokeh.io import output_notebook

breaklen = []
delim = []

# Define the update_paragraph_id function
def update_paragraph_id(paragraph_id):
    # Convert PIRA1 value to match HTML href format
    converted_paragraph_id = convert_to_html_format(paragraph_id)
    
    display(Javascript(f"""
        var widgetId = 'paragraph-id-widget';
        var widgetElem = document.getElementById(widgetId);
        if (widgetElem) {{
            widgetElem.innerHTML = '<b>Paragraph ID:</b> ' + '{converted_paragraph_id}';
        }}
    """))

# Create a custom HTML widget to display the selected paragraph ID
class ParagraphIdWidget(Javascript):
    def __init__(self, paragraph_id=""):
        super().__init__(f"""
        var widgetId = 'paragraph-id-widget';
        var widgetElem = document.getElementById(widgetId);
        if (!widgetElem) {{
            widgetElem = document.createElement('div');
            widgetElem.id = widgetId;
            element.append(widgetElem);
        }}
        widgetElem.innerHTML = '<b>Paragraph ID:</b> ' + '{paragraph_id}';
        """, lib=None)

def convert_to_html_format(pira_value):
    """
    Convert PIRA value to match the format used in HTML href attributes in Jupyter Book pages.
    """
    try:
        # Extract chapter, paragraph, and sub-paragraph from PIRA value
        chapter, paragraph, sub_paragraph = re.match(r'^(\d+)([A-Z])(\d+\.\d+)$', pira_value).groups()
        
        # Convert paragraph to Jupyterbook-style paragraph ID
        jupyter_paragraph_id = f'{chapter.lower()}{ord(paragraph) - ord("A") + 1:02d}-{sub_paragraph.replace(".", "")}'
        return jupyter_paragraph_id
    except AttributeError:
        print("Invalid PIRA format:", pira_value)
        return ''

def create_multiline_labels(source, label_field, p, break_length=breaklen, delimiter=delim):
    labels_source = source  # Use the same source for labels and bars
    
    # Combine the necessary columns into a single string with ':'
    labels_series = (
        source.data[demo_column] + ": " + 
        source.data[pira1_column] + "\n" + 
        source.data[pira2_column] + "\n" + 
        source.data[topic_column] + "\n" + 
        source.data[concept_column]
    )
    
    # Add the delimiter to each label
    labels_series = labels_series + delimiter
    
    if len(labels_series) > 0:
        # Use numpy.vectorize to apply the function element-wise
        vectorized_linebreak = np.vectorize(lambda x: '<br>'.join([x[i:i+break_length] for i in range(0, len(x), break_length)]))
        
        # Apply line breaks using vectorized function
        labels_series = vectorized_linebreak(labels_series)

        # Assign the modified array back to the ColumnDataSource
        labels_source.data[label_field] = labels_series.tolist()

        p.text(x=0, y='Merged', text=label_field, source=labels_source,
               text_align='left', text_font_size='10pt', text_color='black')

# Update the read_and_plot_histograms function to adjust the y-coordinates for hbar
def read_and_plot_histograms(file_path, 
                             sheet_name, 
                             area_column, 
                             topic_column, 
                             concept_column, 
                             demo_column, 
                             occurrence_column, 
                             histogram_size, 
                             histogram_min_size, 
                             plot_heights):

    # Read the Excel file with the specified sheet name
    result_df = pd.read_excel(file_path, sheet_name=sheet_name, header=None, skiprows=skip)

    # Assign row headers from the first row
    result_df.columns = result_df.iloc[0]

    # Drop the first row (which is now used as column headers)
    result_df = result_df.drop(0)

    # Check if the DataFrame has at least two columns
    if len(result_df.columns) < 2:
        raise ValueError("DataFrame should have at least two columns for plotting.")

    # Convert 'Demo', 'PIRA1', and 'PIRA2' columns to strings to preserve the original format
    result_df[demo_column] = result_df[demo_column].astype(str)
    result_df[pira1_column] = result_df[pira1_column].astype(str)
    result_df[pira2_column] = result_df[pira2_column].astype(str)
    result_df[topic_column] = result_df[topic_column].astype(str)
    result_df[concept_column] = result_df[concept_column].astype(str)

    # Replace NaN values in 'PIRA1', 'PIRA2', 'Topic', and 'Concept' columns with empty strings
    result_df[pira1_column].replace('nan', '', inplace=True)
    result_df[pira2_column].replace('nan', '', inplace=True)
    result_df[topic_column].replace('nan', '', inplace=True)
    result_df[concept_column].replace('nan', '', inplace=True)

    # Get unique values in the 'Area' column
    # This is done since each area has a histogram plotted
    unique_areas = result_df[area_column].unique()

    for i, area in enumerate(unique_areas):

        # Filter the DataFrame based on 'Area' column
        selected_df = result_df[result_df[area_column] == area]

        # Check if the selected DataFrame has at least two columns
        if len(selected_df.columns) < 2:
            raise ValueError("Selected DataFrame should have at least two columns for plotting.")

        # Exclude rows where both 'PIRA1' and 'PIRA2' are NaN
        #selected_df = selected_df.dropna(subset=[pira1_column, pira2_column], how='all')
        selected_df = selected_df.dropna(subset=[pira1_column, pira2_column, topic_column, concept_column], how='all')

        # Clean up the 'Topic' column data by removing leading/trailing spaces and extra newlines
        selected_df[topic_column] = selected_df[topic_column].str.strip().replace('\n', ' ')

        # Clean up the 'Concept' column data by removing leading/trailing spaces and extra newlines
        selected_df[concept_column] = selected_df[concept_column].str.strip().replace('\n', ' ')

        # Merge 'Demo', 'PIRA1', 'PIRA2', 'Topic', and 'Concept' columns based on their original order of appearance
        selected_df['Merged'] = (
            selected_df[demo_column]+ " (" + 
            selected_df[pira1_column]+ ") (" + 
            selected_df[pira2_column]+ ")\n" + 
            selected_df[topic_column]+ "\n" + 
            selected_df[concept_column]
        )

        # Drop duplicate rows based on the 'Merged' column
        selected_df = selected_df.drop_duplicates(subset=['Merged'])

        # Create a ColumnDataSource for the Bokeh plot
        source = ColumnDataSource(selected_df)

        # Set the anchor base URL
        #anchor_base_url = 'file:///C:/Users/t/de/_build/html/book/'  # Replace with your base URL

        # Set the anchor base URL dynamically
        anchor_base_url = None

        script = """
            var anchorBaseUrl;

            function getAnchorBaseUrl() {
                return anchorBaseUrl;
            }

            function convertToAnchorFormat(pira_value) {
                var pira1;  // Define pira1 within the function scope

                // Extract chapter, paragraph, and sub-paragraph from PIRA value
                var matches = pira_value.match(/^(\d+)([A-Z])(\d+\.\d+)$/);
                if (matches) {
                    var chapter = matches[1].toLowerCase();
                    var paragraph = matches[2];
                    var sub_paragraph = matches[3].replace('.', '-');

                    // Extract sub_paragraph and demo_no from sub_paragraph
                    var sub_paragraph_match = sub_paragraph.match(/^(\d+)-(\d+)$/);
                    if (sub_paragraph_match) {
                        var sub_paragraph = sub_paragraph_match[1];
                        var demo_no = sub_paragraph_match[2];
                    } else {
                        var demo_no = null;
                    }

                    // Remove leading zeros from the chapter
                    chapter = chapter.replace(/^0+/, '');

                    pira1 = chapter + paragraph + sub_paragraph + '.' + demo_no;

                    // Store values for printing
                    convertToAnchorFormat.pira1 = pira1;
                    convertToAnchorFormat.chapter = chapter;
                    convertToAnchorFormat.paragraph = paragraph;
                    convertToAnchorFormat.subparagraph = sub_paragraph;
                    convertToAnchorFormat.demo_no = demo_no;

                    console.log("PIRA1 (pira1):", convertToAnchorFormat.pira1);
                    console.log("Chapter (area):", convertToAnchorFormat.chapter);
                    console.log("Paragraph (topic):", convertToAnchorFormat.paragraph);
                    console.log("Subparagraph (concept):", convertToAnchorFormat.subparagraph);
                    console.log("Demo No:", convertToAnchorFormat.demo_no);

                    // Print the complete PIRA1 value
                    console.log("Complete PIRA1 Value:", pira_value);

                    // Return the formatted anchor value with demo_no
                    //return paragraph + sub_paragraph + (demo_no !== null ? '-' + demo_no : '');
                    return chapter + paragraph + sub_paragraph + (demo_no !== null ? '-' + demo_no : '');
                } else {
                    console.error("Invalid PIRA format:", pira_value);
                    return '';
                }
            }
        """

        js_callback = CustomJS(
            args=dict(source=source),
            code=f"""
                {script}
                console.log("Callback triggered!");

                // Access data from the ColumnDataSource
                var data = source.data;
                console.log("Data:", data);

                // Log area value in the console
                console.log("Area:", "{area}");

                // Convert the entire area string to lowercase
                var modifiedArea = "{area}".toLowerCase();

                console.log("Modified Area:", modifiedArea);

                // Declare modifiedConcept variable at the beginning of the function
                var modifiedConcept;

                if (cb_data !== undefined && cb_data.source !== undefined) {{
                    var index = cb_data.source.selected.indices[0];
                    if (index !== undefined) {{
                        var data = source.data;
                        var pira1_value = data['PIRA1'][index];

                        console.log("PIRA1:", pira1_value);

                        // Extract chapter, paragraph, and sub-paragraph from PIRA1 value
                        var matches = pira1_value.match(/^(\d+)([A-Z])(\d+\.\d+)$/);
                        if (matches) {{
                            var chapter = matches[1].toLowerCase();  // Extracted chapter
                            var paragraph = matches[2];
                            //var sub_paragraph = matches[3].replace('.', '-');
                            var sub_paragraph = matches[3].split('.')[0]; // Extract the first part before the dot
                            var demonstration = matches[3].split('.')[1]; // Extract the part after the dot

                            console.log("Chapter:", chapter);
                            console.log("Paragraph:", paragraph);
                            console.log("Sub-Paragraph:", sub_paragraph);
                            console.log("Demonstration:", demonstration);
                        }}

                        // Extract paragraph from PIRA1 value
                        var paragraph = pira1_value.match(/^\d+([A-Z])\d+\.\d+$/)[1];

                        console.log("Extracted Paragraph:", paragraph);

                        // Convert PIRA1 value to the anchor format
                        var anchorFormat = convertToAnchorFormat(pira1_value);

                        console.log("Anchor Format:", anchorFormat);

                        // Get the current notebook URL
                        var notebookUrl = window.location.href;

                        console.log("Notebook URL:", notebookUrl);

                        // Find the position of "/book" in the URL
                        var bookIndex = notebookUrl.indexOf("/book");
                        if (bookIndex !== -1) {{
                            // Extract the substring up to "/book" (including "/book")
                            anchorBaseUrl = notebookUrl.substring(0, bookIndex + 5);
                        }} else {{
                            // "/book" not found, use the entire URL
                            anchorBaseUrl = notebookUrl;
                        }}

                        console.log("Base URL:", anchorBaseUrl);

                        // Include the modified area in the target URL
                        var targetUrl = anchorBaseUrl + '/' + modifiedArea;
                        console.log("Target URL (After Modified Area):", targetUrl);

                        // Check if 'Topic' column is defined, not null/undefined, and not an empty string
                        if (data['Topic'] !== undefined && data['Topic'][index] !== null && data['Topic'][index] !== '') {{
                            // Extract the first 'Topic' value and append it to the target URL
                            var topics = data['Topic'][index].split('\\n');
                            var modifiedTopic = topics[0].trim().toLowerCase();

                            console.log("Modified Topic:", modifiedTopic);
                        }}

                        // Check if 'Concept' column is defined, not null/undefined, and not an empty string
                        if (data['Concept'] !== undefined && data['Concept'][index] !== null && data['Concept'][index] !== '') {{
                            // Extract the first value from 'Concept' column
                            var concept_value = data['Concept'][index];
                            modifiedConcept = concept_value.split('\\n')[0].trim().toLowerCase();

                            console.log("Modified Concept:", modifiedConcept);

                            // Append modifiedTopic and modifiedConcept to the target URL
                            //targetUrl += '/' + chapter + paragraph + sub_paragraph + '%20' + modifiedTopic + '/' + chapter + paragraph + sub_paragraph + '%20' + modifiedTopic + '/' + modifiedConcept + '.html#' + anchorFormat;
                            targetUrl += '/' + chapter + paragraph + '%20' + modifiedTopic + '/' + chapter + paragraph + sub_paragraph + '%20' + modifiedConcept + '/' + anchorFormat + '.html#';
                            
                            console.log("Full URL:", targetUrl);

                            // Open the hyperlink in a new tab
                            window.open(targetUrl, '_blank');
                        }}
                    }}
                }}

                // Ensure the callback is working
                console.log("Callback completed successfully!");
            """
        )

        # Set the plot height for each plot
        plot_height = plot_heights[i]

        # Create a Bokeh figure with explicit y_range for categorical factors
        p = figure(y_range=source.data['Merged'].tolist(), toolbar_location=None, tools="", height=plot_height)

        # Initialize the bars variable
        bars = None

        # Check if the data type is numeric
        if pd.api.types.is_numeric_dtype(selected_df[demo_column]):
            # For numeric data, use a horizontal histogram
            bars = p.hbar(y='Merged', right=demo_column, height=bar_height, source=ColumnDataSource(selected_df),
                        line_color="black", fill_color="blue", line_width=linewi)
        else:
            # For non-numeric data, use a horizontal count plot
            bars = p.hbar(y='Merged', right=occurrence_column, height=bar_height, source=ColumnDataSource(selected_df),
                        line_color="black", fill_color="blue", line_width=linewi)

        # Check if bars is not None before setting up the TapTool
        if bars is not None:
            # Continue with the rest of your code
            # Add hover tool
            hover = HoverTool()
            hover.tooltips = [("Value", f"@{demo_column}"), ("Merged", "@Merged")]
            p.add_tools(hover)

            # Create a ColumnDataSource for the Bokeh plot
            source = ColumnDataSource(selected_df)

            # Set up multiline labels
            #create_multiline_labels(source, 'Merged', p, break_length=6)  # Adjust break_length as needed
            #create_multiline_labels(source, 'Merged', p)
            create_multiline_labels(source, 'Merged', p, break_length=breaklen, delimiter=delim)

            # Assign the callback to the TapTool
            taptool = TapTool(renderers=[bars], callback=js_callback)
            p.add_tools(taptool)

            # Wrap the title text
            title_text = f'{occurrence_column} of {demo_column} \nin field:{area}'
            title = Title(text=title_text)
            p.title = title

            # Show the plot
            output_notebook()
            show(p, notebook_handle=True)

            # Print the relevant columns of the data after creating the histogram
            #print(f"Data used for histogram for area: {area}")
            #print(selected_df[[demo_column, pira1_column, pira2_column, topic_column, concept_column]])

# Example usage:
outdir = '../stats/data'  # Replace with the path to your directory
#filename = 'DemoList.xlsx'  # Replace with your file name
#filename = 'DemoList_31Jan2024.xlsx'
filename = 'DemoList_08Feb2024.xlsx'
file = os.path.join(outdir, filename)
sheet_name = 'Overview'
skip = 4 # skip first row of the excel file
area_column = 'Area'
topic_column = 'Topic'
concept_column = 'Concept'
pira1_column = 'PIRA1'
pira2_column = 'PIRA2'
demo_column = 'Demo'
occurrence_column = 'Occurences'

# Add this definition for bar_height
linewi = 1
histogram_size = 0.4
histogram_min_size = 10
bar_height = 0.9
delim = ' - '
breaklen = 6

# Set individual heights for each plot (adjust these values according to your preference)
plotheight1 = 4800
plotheight2 = 700
plotheight3 = 2600
plotheight4 = 1600
plotheight5 = 2000
plotheight6 = 400
plotheight7 = 2000
plotheight8 = 80
plotheight9 = 500
plot_heights = [plotheight1, 
                plotheight2, 
                plotheight3, 
                plotheight4, 
                plotheight5, 
                plotheight6, 
                plotheight7, 
                plotheight8, 
                plotheight9]  # Add more heights as needed

# Display the custom widget
display(ParagraphIdWidget(""))

# Read and plot histograms
read_and_plot_histograms(file, 
                         sheet_name, 
                         area_column, 
                         topic_column, 
                         concept_column, 
                         demo_column, 
                         occurrence_column, 
                         histogram_size, 
                         histogram_min_size, 
                         plot_heights)
/var/gitlab-runner-venvs/demolab/lib/python3.10/site-packages/openpyxl/worksheet/_reader.py:329: UserWarning: Data Validation extension is not supported and will be removed
  warn(msg)
Loading BokehJS ...
Loading BokehJS ...
Loading BokehJS ...
Loading BokehJS ...
Loading BokehJS ...
Loading BokehJS ...
Loading BokehJS ...
Loading BokehJS ...
Loading BokehJS ...