Demonstration Statistics#
This part of the book present some figures regarding the popularity of the classroom demonstrations within the Applied Sciences faculty of Delft University of Technology. Below you will find histograms presenting the number of times a demonstration was presented. Each histogram presents data for demos in a particular field of physics, such as, Mechanics, Fluid Mechanics, Oscillations and Waves, etc. Data was collected over a period of 12 academic years.
Histograms#
Show code cell source
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 30 13:29:31 2024
File reads data from an excel file and plots histograms
"""
{
"tags": [
"hide-input",
]
}
import pandas as pd
import numpy as np
import os
import re
from IPython.display import display, Javascript
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, TapTool, CustomJS, Label
from bokeh.models.tools import HoverTool
from bokeh.models.annotations import Title
from bokeh.io import output_notebook
breaklen = []
delim = []
# Define the update_paragraph_id function
def update_paragraph_id(paragraph_id):
# Convert PIRA1 value to match HTML href format
converted_paragraph_id = convert_to_html_format(paragraph_id)
display(Javascript(f"""
var widgetId = 'paragraph-id-widget';
var widgetElem = document.getElementById(widgetId);
if (widgetElem) {{
widgetElem.innerHTML = '<b>Paragraph ID:</b> ' + '{converted_paragraph_id}';
}}
"""))
# Create a custom HTML widget to display the selected paragraph ID
class ParagraphIdWidget(Javascript):
def __init__(self, paragraph_id=""):
super().__init__(f"""
var widgetId = 'paragraph-id-widget';
var widgetElem = document.getElementById(widgetId);
if (!widgetElem) {{
widgetElem = document.createElement('div');
widgetElem.id = widgetId;
element.append(widgetElem);
}}
widgetElem.innerHTML = '<b>Paragraph ID:</b> ' + '{paragraph_id}';
""", lib=None)
def convert_to_html_format(pira_value):
"""
Convert PIRA value to match the format used in HTML href attributes in Jupyter Book pages.
"""
try:
# Extract chapter, paragraph, and sub-paragraph from PIRA value
chapter, paragraph, sub_paragraph = re.match(r'^(\d+)([A-Z])(\d+\.\d+)$', pira_value).groups()
# Convert paragraph to Jupyterbook-style paragraph ID
jupyter_paragraph_id = f'{chapter.lower()}{ord(paragraph) - ord("A") + 1:02d}-{sub_paragraph.replace(".", "")}'
return jupyter_paragraph_id
except AttributeError:
print("Invalid PIRA format:", pira_value)
return ''
def create_multiline_labels(source, label_field, p, break_length=breaklen, delimiter=delim):
labels_source = source # Use the same source for labels and bars
# Combine the necessary columns into a single string with ':'
labels_series = (
source.data[demo_column] + ": " +
source.data[pira1_column] + "\n" +
source.data[pira2_column] + "\n" +
source.data[topic_column] + "\n" +
source.data[concept_column]
)
# Add the delimiter to each label
labels_series = labels_series + delimiter
if len(labels_series) > 0:
# Use numpy.vectorize to apply the function element-wise
vectorized_linebreak = np.vectorize(lambda x: '<br>'.join([x[i:i+break_length] for i in range(0, len(x), break_length)]))
# Apply line breaks using vectorized function
labels_series = vectorized_linebreak(labels_series)
# Assign the modified array back to the ColumnDataSource
labels_source.data[label_field] = labels_series.tolist()
p.text(x=0, y='Merged', text=label_field, source=labels_source,
text_align='left', text_font_size='10pt', text_color='black')
# Update the read_and_plot_histograms function to adjust the y-coordinates for hbar
def read_and_plot_histograms(file_path,
sheet_name,
area_column,
topic_column,
concept_column,
demo_column,
occurrence_column,
histogram_size,
histogram_min_size,
plot_heights):
# Read the Excel file with the specified sheet name
result_df = pd.read_excel(file_path, sheet_name=sheet_name, header=None, skiprows=skip)
# Assign row headers from the first row
result_df.columns = result_df.iloc[0]
# Drop the first row (which is now used as column headers)
result_df = result_df.drop(0)
# Check if the DataFrame has at least two columns
if len(result_df.columns) < 2:
raise ValueError("DataFrame should have at least two columns for plotting.")
# Convert 'Demo', 'PIRA1', and 'PIRA2' columns to strings to preserve the original format
result_df[demo_column] = result_df[demo_column].astype(str)
result_df[pira1_column] = result_df[pira1_column].astype(str)
result_df[pira2_column] = result_df[pira2_column].astype(str)
result_df[topic_column] = result_df[topic_column].astype(str)
result_df[concept_column] = result_df[concept_column].astype(str)
# Replace NaN values in 'PIRA1', 'PIRA2', 'Topic', and 'Concept' columns with empty strings
result_df[pira1_column].replace('nan', '', inplace=True)
result_df[pira2_column].replace('nan', '', inplace=True)
result_df[topic_column].replace('nan', '', inplace=True)
result_df[concept_column].replace('nan', '', inplace=True)
# Get unique values in the 'Area' column
# This is done since each area has a histogram plotted
unique_areas = result_df[area_column].unique()
for i, area in enumerate(unique_areas):
# Filter the DataFrame based on 'Area' column
selected_df = result_df[result_df[area_column] == area]
# Check if the selected DataFrame has at least two columns
if len(selected_df.columns) < 2:
raise ValueError("Selected DataFrame should have at least two columns for plotting.")
# Exclude rows where both 'PIRA1' and 'PIRA2' are NaN
#selected_df = selected_df.dropna(subset=[pira1_column, pira2_column], how='all')
selected_df = selected_df.dropna(subset=[pira1_column, pira2_column, topic_column, concept_column], how='all')
# Clean up the 'Topic' column data by removing leading/trailing spaces and extra newlines
selected_df[topic_column] = selected_df[topic_column].str.strip().replace('\n', ' ')
# Clean up the 'Concept' column data by removing leading/trailing spaces and extra newlines
selected_df[concept_column] = selected_df[concept_column].str.strip().replace('\n', ' ')
# Merge 'Demo', 'PIRA1', 'PIRA2', 'Topic', and 'Concept' columns based on their original order of appearance
selected_df['Merged'] = (
selected_df[demo_column]+ " (" +
selected_df[pira1_column]+ ") (" +
selected_df[pira2_column]+ ")\n" +
selected_df[topic_column]+ "\n" +
selected_df[concept_column]
)
# Drop duplicate rows based on the 'Merged' column
selected_df = selected_df.drop_duplicates(subset=['Merged'])
# Create a ColumnDataSource for the Bokeh plot
source = ColumnDataSource(selected_df)
# Set the anchor base URL
#anchor_base_url = 'file:///C:/Users/t/de/_build/html/book/' # Replace with your base URL
# Set the anchor base URL dynamically
anchor_base_url = None
script = """
var anchorBaseUrl;
function getAnchorBaseUrl() {
return anchorBaseUrl;
}
function convertToAnchorFormat(pira_value) {
var pira1; // Define pira1 within the function scope
// Extract chapter, paragraph, and sub-paragraph from PIRA value
var matches = pira_value.match(/^(\d+)([A-Z])(\d+\.\d+)$/);
if (matches) {
var chapter = matches[1].toLowerCase();
var paragraph = matches[2];
var sub_paragraph = matches[3].replace('.', '-');
// Extract sub_paragraph and demo_no from sub_paragraph
var sub_paragraph_match = sub_paragraph.match(/^(\d+)-(\d+)$/);
if (sub_paragraph_match) {
var sub_paragraph = sub_paragraph_match[1];
var demo_no = sub_paragraph_match[2];
} else {
var demo_no = null;
}
// Remove leading zeros from the chapter
chapter = chapter.replace(/^0+/, '');
pira1 = chapter + paragraph + sub_paragraph + '.' + demo_no;
// Store values for printing
convertToAnchorFormat.pira1 = pira1;
convertToAnchorFormat.chapter = chapter;
convertToAnchorFormat.paragraph = paragraph;
convertToAnchorFormat.subparagraph = sub_paragraph;
convertToAnchorFormat.demo_no = demo_no;
console.log("PIRA1 (pira1):", convertToAnchorFormat.pira1);
console.log("Chapter (area):", convertToAnchorFormat.chapter);
console.log("Paragraph (topic):", convertToAnchorFormat.paragraph);
console.log("Subparagraph (concept):", convertToAnchorFormat.subparagraph);
console.log("Demo No:", convertToAnchorFormat.demo_no);
// Print the complete PIRA1 value
console.log("Complete PIRA1 Value:", pira_value);
// Return the formatted anchor value with demo_no
//return paragraph + sub_paragraph + (demo_no !== null ? '-' + demo_no : '');
return chapter + paragraph + sub_paragraph + (demo_no !== null ? '-' + demo_no : '');
} else {
console.error("Invalid PIRA format:", pira_value);
return '';
}
}
"""
js_callback = CustomJS(
args=dict(source=source),
code=f"""
{script}
console.log("Callback triggered!");
// Access data from the ColumnDataSource
var data = source.data;
console.log("Data:", data);
// Log area value in the console
console.log("Area:", "{area}");
// Convert the entire area string to lowercase
var modifiedArea = "{area}".toLowerCase();
console.log("Modified Area:", modifiedArea);
// Declare modifiedConcept variable at the beginning of the function
var modifiedConcept;
if (cb_data !== undefined && cb_data.source !== undefined) {{
var index = cb_data.source.selected.indices[0];
if (index !== undefined) {{
var data = source.data;
var pira1_value = data['PIRA1'][index];
console.log("PIRA1:", pira1_value);
// Extract chapter, paragraph, and sub-paragraph from PIRA1 value
var matches = pira1_value.match(/^(\d+)([A-Z])(\d+\.\d+)$/);
if (matches) {{
var chapter = matches[1].toLowerCase(); // Extracted chapter
var paragraph = matches[2];
//var sub_paragraph = matches[3].replace('.', '-');
var sub_paragraph = matches[3].split('.')[0]; // Extract the first part before the dot
var demonstration = matches[3].split('.')[1]; // Extract the part after the dot
console.log("Chapter:", chapter);
console.log("Paragraph:", paragraph);
console.log("Sub-Paragraph:", sub_paragraph);
console.log("Demonstration:", demonstration);
}}
// Extract paragraph from PIRA1 value
var paragraph = pira1_value.match(/^\d+([A-Z])\d+\.\d+$/)[1];
console.log("Extracted Paragraph:", paragraph);
// Convert PIRA1 value to the anchor format
var anchorFormat = convertToAnchorFormat(pira1_value);
console.log("Anchor Format:", anchorFormat);
// Get the current notebook URL
var notebookUrl = window.location.href;
console.log("Notebook URL:", notebookUrl);
// Find the position of "/book" in the URL
var bookIndex = notebookUrl.indexOf("/book");
if (bookIndex !== -1) {{
// Extract the substring up to "/book" (including "/book")
anchorBaseUrl = notebookUrl.substring(0, bookIndex + 5);
}} else {{
// "/book" not found, use the entire URL
anchorBaseUrl = notebookUrl;
}}
console.log("Base URL:", anchorBaseUrl);
// Include the modified area in the target URL
var targetUrl = anchorBaseUrl + '/' + modifiedArea;
console.log("Target URL (After Modified Area):", targetUrl);
// Check if 'Topic' column is defined, not null/undefined, and not an empty string
if (data['Topic'] !== undefined && data['Topic'][index] !== null && data['Topic'][index] !== '') {{
// Extract the first 'Topic' value and append it to the target URL
var topics = data['Topic'][index].split('\\n');
var modifiedTopic = topics[0].trim().toLowerCase();
console.log("Modified Topic:", modifiedTopic);
}}
// Check if 'Concept' column is defined, not null/undefined, and not an empty string
if (data['Concept'] !== undefined && data['Concept'][index] !== null && data['Concept'][index] !== '') {{
// Extract the first value from 'Concept' column
var concept_value = data['Concept'][index];
modifiedConcept = concept_value.split('\\n')[0].trim().toLowerCase();
console.log("Modified Concept:", modifiedConcept);
// Append modifiedTopic and modifiedConcept to the target URL
//targetUrl += '/' + chapter + paragraph + sub_paragraph + '%20' + modifiedTopic + '/' + chapter + paragraph + sub_paragraph + '%20' + modifiedTopic + '/' + modifiedConcept + '.html#' + anchorFormat;
targetUrl += '/' + chapter + paragraph + '%20' + modifiedTopic + '/' + chapter + paragraph + sub_paragraph + '%20' + modifiedConcept + '/' + anchorFormat + '.html#';
console.log("Full URL:", targetUrl);
// Open the hyperlink in a new tab
window.open(targetUrl, '_blank');
}}
}}
}}
// Ensure the callback is working
console.log("Callback completed successfully!");
"""
)
# Set the plot height for each plot
plot_height = plot_heights[i]
# Create a Bokeh figure with explicit y_range for categorical factors
p = figure(y_range=source.data['Merged'].tolist(), toolbar_location=None, tools="", height=plot_height)
# Initialize the bars variable
bars = None
# Check if the data type is numeric
if pd.api.types.is_numeric_dtype(selected_df[demo_column]):
# For numeric data, use a horizontal histogram
bars = p.hbar(y='Merged', right=demo_column, height=bar_height, source=ColumnDataSource(selected_df),
line_color="black", fill_color="blue", line_width=linewi)
else:
# For non-numeric data, use a horizontal count plot
bars = p.hbar(y='Merged', right=occurrence_column, height=bar_height, source=ColumnDataSource(selected_df),
line_color="black", fill_color="blue", line_width=linewi)
# Check if bars is not None before setting up the TapTool
if bars is not None:
# Continue with the rest of your code
# Add hover tool
hover = HoverTool()
hover.tooltips = [("Value", f"@{demo_column}"), ("Merged", "@Merged")]
p.add_tools(hover)
# Create a ColumnDataSource for the Bokeh plot
source = ColumnDataSource(selected_df)
# Set up multiline labels
#create_multiline_labels(source, 'Merged', p, break_length=6) # Adjust break_length as needed
#create_multiline_labels(source, 'Merged', p)
create_multiline_labels(source, 'Merged', p, break_length=breaklen, delimiter=delim)
# Assign the callback to the TapTool
taptool = TapTool(renderers=[bars], callback=js_callback)
p.add_tools(taptool)
# Wrap the title text
title_text = f'{occurrence_column} of {demo_column} \nin field:{area}'
title = Title(text=title_text)
p.title = title
# Show the plot
output_notebook()
show(p, notebook_handle=True)
# Print the relevant columns of the data after creating the histogram
#print(f"Data used for histogram for area: {area}")
#print(selected_df[[demo_column, pira1_column, pira2_column, topic_column, concept_column]])
# Example usage:
outdir = '../stats/data' # Replace with the path to your directory
#filename = 'DemoList.xlsx' # Replace with your file name
#filename = 'DemoList_31Jan2024.xlsx'
filename = 'DemoList_08Feb2024.xlsx'
file = os.path.join(outdir, filename)
sheet_name = 'Overview'
skip = 4 # skip first row of the excel file
area_column = 'Area'
topic_column = 'Topic'
concept_column = 'Concept'
pira1_column = 'PIRA1'
pira2_column = 'PIRA2'
demo_column = 'Demo'
occurrence_column = 'Occurences'
# Add this definition for bar_height
linewi = 1
histogram_size = 0.4
histogram_min_size = 10
bar_height = 0.9
delim = ' - '
breaklen = 6
# Set individual heights for each plot (adjust these values according to your preference)
plotheight1 = 4800
plotheight2 = 700
plotheight3 = 2600
plotheight4 = 1600
plotheight5 = 2000
plotheight6 = 400
plotheight7 = 2000
plotheight8 = 80
plotheight9 = 500
plot_heights = [plotheight1,
plotheight2,
plotheight3,
plotheight4,
plotheight5,
plotheight6,
plotheight7,
plotheight8,
plotheight9] # Add more heights as needed
# Display the custom widget
display(ParagraphIdWidget(""))
# Read and plot histograms
read_and_plot_histograms(file,
sheet_name,
area_column,
topic_column,
concept_column,
demo_column,
occurrence_column,
histogram_size,
histogram_min_size,
plot_heights)
/var/gitlab-runner-venvs/demolab/lib/python3.10/site-packages/openpyxl/worksheet/_reader.py:329: UserWarning: Data Validation extension is not supported and will be removed
warn(msg)