From cb10ffb262c8d7377c2442941e38d33eb95aeef0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 14 Mar 2026 07:11:47 +0000
Subject: [PATCH 01/11] Initial plan
From e40fe7182faebf88201bebd25a044a9e053f63be Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 14 Mar 2026 07:25:30 +0000
Subject: [PATCH 02/11] Remove vestigial template pages and tests with broken
imports to fix CI
Remove content pages not used by app.py that reference non-existent modules:
- content/documentation.py, topp_workflow_*.py, file_upload.py, raw_data_viewer.py,
run_example_workflow.py, download_section.py, simple_workflow.py, run_subprocess.py,
peptide_mz_calculator.py
Remove test files with broken imports:
- test.py (imports src.simpleworkflow, src.mzmlfileworkflow)
- tests/test_simple_workflow.py (tests removed page)
- tests/test_run_subprocess.py (tests removed page)
Update test_gui.py with valid tests for settings and page existence.
Update workflow-tests.yml to remove reference to deleted test.py.
Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com>
---
.github/workflows/workflow-tests.yml | 3 -
content/documentation.py | 109 ------------
content/download_section.py | 74 --------
content/file_upload.py | 133 ---------------
content/peptide_mz_calculator.py | 247 ---------------------------
content/raw_data_viewer.py | 44 -----
content/run_example_workflow.py | 59 -------
content/run_subprocess.py | 89 ----------
content/simple_workflow.py | 45 -----
content/topp_workflow_execution.py | 12 --
content/topp_workflow_file_upload.py | 11 --
content/topp_workflow_parameter.py | 10 --
content/topp_workflow_results.py | 10 --
test.py | 24 ---
test_gui.py | 157 +++--------------
tests/test_run_subprocess.py | 37 ----
tests/test_simple_workflow.py | 69 --------
17 files changed, 27 insertions(+), 1106 deletions(-)
delete mode 100644 content/documentation.py
delete mode 100644 content/download_section.py
delete mode 100644 content/file_upload.py
delete mode 100644 content/peptide_mz_calculator.py
delete mode 100644 content/raw_data_viewer.py
delete mode 100644 content/run_example_workflow.py
delete mode 100644 content/run_subprocess.py
delete mode 100644 content/simple_workflow.py
delete mode 100644 content/topp_workflow_execution.py
delete mode 100644 content/topp_workflow_file_upload.py
delete mode 100644 content/topp_workflow_parameter.py
delete mode 100644 content/topp_workflow_results.py
delete mode 100644 test.py
delete mode 100644 tests/test_run_subprocess.py
delete mode 100644 tests/test_simple_workflow.py
diff --git a/.github/workflows/workflow-tests.yml b/.github/workflows/workflow-tests.yml
index 92b0b99..b459a2d 100644
--- a/.github/workflows/workflow-tests.yml
+++ b/.github/workflows/workflow-tests.yml
@@ -20,9 +20,6 @@ jobs:
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pytest
- - name: Running test cases
- run: |
- pytest test.py
- name: Running GUI tests
run: |
pytest test_gui.py
diff --git a/content/documentation.py b/content/documentation.py
deleted file mode 100644
index c308213..0000000
--- a/content/documentation.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import streamlit as st
-from src.common.common import page_setup
-from pathlib import Path
-from docs.toppframework import content as topp_framework_content
-
-page_setup()
-
-
-st.title("Documentation")
-
-cols = st.columns(2)
-
-pages = [
- "User Guide",
- "Installation",
- "Developers Guide: How to build app based on this template",
- "Developers Guide: TOPP Workflow Framework",
- "Developer Guide: Windows Executables",
- "Developers Guide: Deployment",
-]
-page = cols[0].selectbox(
- "**Content**",
- pages,
-)
-
-#############################################################################################
-# User Guide
-#############################################################################################
-
-if page == pages[0]:
- with open(Path("docs", "user_guide.md"), "r", encoding="utf-8") as f:
- content = f.read()
- st.markdown(content)
-
-#############################################################################################
-# Installation
-#############################################################################################
-
-if page == pages[1]:
- if Path("OpenMS-App.zip").exists():
- st.markdown(
- """
-Download the latest version for **Windows** here clicking the button below.
-"""
- )
- with open("OpenMS-App.zip", "rb") as file:
- st.download_button(
- label="Download for Windows",
- data=file,
- file_name="OpenMS-App.zip",
- mime="archive/zip",
- type="primary",
- )
- with open(Path("docs", "installation.md"), "r", encoding="utf-8") as f:
- content = f.read()
- st.markdown(content)
-
-#############################################################################################
-# Developer Overview, how to build app based on Template
-#############################################################################################
-
-if page == pages[2]:
- with open(Path("docs", "build_app.md"), "r", encoding="utf-8") as f:
- content = f.read()
- st.markdown(content)
-
-#############################################################################################
-# TOPP Workflow Framework
-#############################################################################################
-
-if page == pages[3]:
- topp_framework_content()
-
-#############################################################################################
-# Windows Executables
-#############################################################################################
-
-if page == pages[4]:
- st.markdown(
- """
-## 💻 How to package everything for Windows executables
-
-This guide explains how to package OpenMS apps into Windows executables using two different methods:
-"""
- )
-
- tabs = ["**embeddable Python**", "**PyInstaller**"]
- tabs = st.tabs(tabs)
-
- # window executable with embeddable python
- with tabs[0]:
- with open(Path("docs", "win_exe_with_embed_py.md"), "r", encoding="utf-8") as f:
- content = f.read()
- st.markdown(content)
-
- # window executable with pyinstaller
- with tabs[1]:
- with open(Path("docs", "win_exe_with_pyinstaller.md"), "r", encoding="utf-8") as f:
- content = f.read()
- st.markdown(content)
-
-#############################################################################################
-# Deployment
-#############################################################################################
-
-if page == pages[5]:
- with open(Path("docs", "deployment.md"), "r", encoding="utf-8") as f:
- content = f.read()
- st.markdown(content)
\ No newline at end of file
diff --git a/content/download_section.py b/content/download_section.py
deleted file mode 100644
index 8856a7b..0000000
--- a/content/download_section.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import streamlit as st
-
-from pathlib import Path
-import shutil
-
-from src.common.common import page_setup
-from zipfile import ZipFile, ZIP_DEFLATED
-
-page_setup()
-
-# Define output folder here; all subfolders will be handled as downloadable
-# directories
-output_folder = 'mzML-workflow-results'
-
-
-# Generate full path
-dirpath = Path(st.session_state["workspace"], output_folder)
-
-# Detect downloadable content
-if dirpath.exists():
- directories = sorted(
- [entry for entry in dirpath.iterdir() if not entry.is_file()]
- )
-else:
- directories = []
-
-# Show error if no content is available for download
-if len(directories) == 0:
- st.error('No results to show yet. Please run a workflow first!')
-else:
- # Table Header
- columns = st.columns(3)
- columns[0].write('**Run**')
- columns[1].write('**Download**')
- columns[2].write('**Delete Result Set**')
-
- # Table Body
- for i, directory in enumerate(directories):
- st.divider()
- columns = st.columns(3)
- columns[0].empty().write(directory.name)
-
- with columns[1]:
- button_placeholder = st.empty()
-
- # Show placeholder button before download is prepared
- clicked = button_placeholder.button('Prepare Download', key=i, use_container_width=True)
- if clicked:
- button_placeholder.empty()
- with st.spinner():
- # Create ZIP file
- out_zip = Path(directory, 'output.zip')
- if not out_zip.exists():
- with ZipFile(out_zip, 'w', ZIP_DEFLATED) as zip_file:
- for output in Path(directory).iterdir():
- if output.name == 'output.zip':
- continue
- try:
- with open(output, 'r') as f:
- zip_file.writestr(output.name, f.read())
- except:
- continue
- # Show download button after ZIP file was created
- with open(out_zip, 'rb') as f:
- button_placeholder.download_button(
- "Download ⬇️", f,
- file_name = f'{directory.name}.zip',
- use_container_width=True
- )
-
- with columns[2]:
- if st.button(f"🗑️ {directory.name}", use_container_width=True):
- shutil.rmtree(directory)
- st.rerun()
\ No newline at end of file
diff --git a/content/file_upload.py b/content/file_upload.py
deleted file mode 100644
index e6acc56..0000000
--- a/content/file_upload.py
+++ /dev/null
@@ -1,133 +0,0 @@
-from pathlib import Path
-
-import streamlit as st
-import pandas as pd
-
-from src.common.common import (
- page_setup,
- save_params,
- v_space,
- show_table,
- TK_AVAILABLE,
- tk_directory_dialog,
-)
-from src import fileupload
-
-params = page_setup()
-
-st.title("File Upload")
-
-# Check if there are any files in the workspace
-mzML_dir = Path(st.session_state.workspace, "mzML-files")
-if not any(Path(mzML_dir).iterdir()):
- # No files present, load example data
- fileupload.load_example_mzML_files()
-
-tabs = ["File Upload"]
-if st.session_state.location == "local":
- tabs.append("Files from local folder")
-
-tabs = st.tabs(tabs)
-
-with tabs[0]:
- with st.form("mzML-upload", clear_on_submit=True):
- files = st.file_uploader(
- "mzML files", accept_multiple_files=(st.session_state.location == "local")
- )
- cols = st.columns(3)
- if cols[1].form_submit_button("Add files to workspace", type="primary"):
- if files:
- fileupload.save_uploaded_mzML(files)
- else:
- st.warning("Select files first.")
-
-# Local file upload option: via directory path
-if st.session_state.location == "local":
- with tabs[1]:
- st_cols = st.columns([0.05, 0.95], gap="small")
- with st_cols[0]:
- st.write("\n")
- st.write("\n")
- dialog_button = st.button(
- "📁",
- key="local_browse",
- help="Browse for your local directory with MS data.",
- disabled=not TK_AVAILABLE,
- )
- if dialog_button:
- st.session_state["local_dir"] = tk_directory_dialog(
- "Select directory with your MS data",
- st.session_state["previous_dir"],
- )
- st.session_state["previous_dir"] = st.session_state["local_dir"]
- with st_cols[1]:
- # with st.form("local-file-upload"):
- local_mzML_dir = st.text_input(
- "path to folder with mzML files", value=st.session_state["local_dir"]
- )
- # raw string for file paths
- local_mzML_dir = rf"{local_mzML_dir}"
- cols = st.columns([0.65, 0.3, 0.4, 0.25], gap="small")
- copy_button = cols[1].button(
- "Copy files to workspace", type="primary", disabled=(local_mzML_dir == "")
- )
- use_copy = cols[2].checkbox(
- "Make a copy of files",
- key="local_browse-copy_files",
- value=True,
- help="Create a copy of files in workspace.",
- )
- if not use_copy:
- st.warning(
- "**Warning**: You have deselected the `Make a copy of files` option. "
- "This **_assumes you know what you are doing_**. "
- "This means that the original files will be used instead. "
- )
- if copy_button:
- fileupload.copy_local_mzML_files_from_directory(local_mzML_dir, use_copy)
-
-if any(Path(mzML_dir).iterdir()):
- v_space(2)
- # Display all mzML files currently in workspace
- df = pd.DataFrame(
- {
- "file name": [
- f.name
- for f in Path(mzML_dir).iterdir()
- if "external_files.txt" not in f.name
- ]
- }
- )
-
- # Check if local files are available
- external_files = Path(mzML_dir, "external_files.txt")
- if external_files.exists():
- with open(external_files, "r") as f_handle:
- external_files = f_handle.readlines()
- external_files = [f.strip() for f in external_files]
- df = pd.concat(
- [df, pd.DataFrame({"file name": external_files})], ignore_index=True
- )
-
- st.markdown("##### mzML files in current workspace:")
- show_table(df)
- v_space(1)
- # Remove files
- with st.expander("🗑️ Remove mzML files"):
- to_remove = st.multiselect(
- "select mzML files", options=[f.stem for f in sorted(mzML_dir.iterdir())]
- )
- c1, c2 = st.columns(2)
- if c2.button(
- "Remove **selected**", type="primary", disabled=not any(to_remove)
- ):
- params = fileupload.remove_selected_mzML_files(to_remove, params)
- save_params(params)
- st.rerun()
-
- if c1.button("⚠️ Remove **all**", disabled=not any(mzML_dir.iterdir())):
- params = fileupload.remove_all_mzML_files(params)
- save_params(params)
- st.rerun()
-
-save_params(params)
diff --git a/content/peptide_mz_calculator.py b/content/peptide_mz_calculator.py
deleted file mode 100644
index 92275d3..0000000
--- a/content/peptide_mz_calculator.py
+++ /dev/null
@@ -1,247 +0,0 @@
-"""
-Peptide m/z Calculator App.
-"""
-
-import streamlit as st
-import pandas as pd
-from pathlib import Path
-
-from src.common.common import page_setup, v_space
-
-# Import backend functions
-from src.peptide_mz_calculator import (
- calculate_peptide_mz_range,
- validate_sequence,
-)
-
-# Page setup
-page_setup(page="main")
-
-# Hero section & logo
-col1, col2, col3 = st.columns([0.5, 2, 1])
-with col2:
- st.markdown(
- """
-
-
⚖️ Peptide m/z Calculator
-
- Calculate theoretical mass-to-charge ratios (m/z) for peptides with and without modifications.
-
-
- """,
- unsafe_allow_html=True,
- )
-
-# Description
-st.markdown(
- """
-**Calculate precise theoretical m/z values** for peptides.
-
-"""
-)
-
-# Expandable help sections
-with st.expander("📚 **Sequence Format**"):
- st.markdown("""
-
- **💡 Format Tips:**
- - Use parentheses for modifications: `(Oxidation)`, `(Carbamidomethyl)`
- - Use dots for terminal modifications: `.(Acetyl)`, `(Amidated).`
- - Use square brackets for mass deltas: `[+15.995]`, `[-18.010]`
-
- **Examples:**
- - `PEPTIDE`: Basic amino acid sequence
- - `M(Oxidation)PEPTIDE`: Methionine oxidation modification
- - `C(Carbamidomethyl)PEPTIDE`: Carbamidomethyl cysteine modification
- - `.(Acetyl)PEPTIDE`: N-terminal acetylation
- - `PEPTIDE(Amidated).`: C-terminal amidation
- - `PEPTIDE[+15.995]`: Mass delta modification
- - `M[+15.994915]PEPTIDE`: Specific mass delta on methionine
- - `ALSSC(UNIMOD:4)VVDEEQDVER`: UNIMOD modification notation
- - `PEPS(Phospho)TIDE`: Phosphorylation modification
- - `.(Acetyl)M(Oxidation)PEPTIDE`: Multiple modifications
-
- - **Supported Amino Acids:**
- Standard 20 amino acids (A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V) plus X (any) and U (selenocysteine)
-
- - **Common static modifications:**
-
- | Name | Target Residue(s) | Mass Shift (Da) | Description |
- |------|------------------|-----------------|-------------|
- | Carbamidomethyl | C | +57.021464 | Carbamidomethylation of cysteine |
-
- - **Common dynamic modifications (for more see UniMod names):**
-
- | Name | Target Residue(s) | Mass Shift (Da) | Description |
- |------|------------------|-----------------|-------------|
- | Oxidation | M | +15.994915 | Oxidation of methionine |
- | Deamidated | N, Q | +0.984016 | Spontaneous deamidation |
- | Phospho | S, T, Y | +79.966331 | Phosphorylation on STY |
- | Acetyl | Protein N-term | +42.010565 | N-terminal acetylation |
- | Gln->pyro-Glu | N-term Q | -17.026549 | Cyclization of glutamine |
- | Glu->pyro-Glu | N-term E | -18.010565 | Cyclization of glutamic acid |
- | Formyl | N-term | +27.994915 | Formylation of N-terminus |
- | Methyl | K, R, H | +14.015650 | Single methyl group addition |
- | Dimethyl | K, R | +28.031300 | Two methyl groups |
- | Sulfo | Y | +79.956815 | Sulfation on tyrosine |
- | GG (diglycyl) | K | +114.042927 | Ubiquitin remnant on lysine after trypsin digest |
-
- """)
-
-
-
-st.markdown("---")
-
-# Input section
-col1_input, col2_input = st.columns([3, 1])
-
-with col1_input:
- # Sequence input
- sequence_input = st.text_input(
- "Peptide Sequence",
- value="M(Oxidation)PEPTIDE",
- help="""Enter peptide sequence in AASequence format. Examples:
- • PEPTIDE - Basic sequence
- • M(Oxidation)PEPTIDE - Oxidized methionine
- • C(Carbamidomethyl)PEPTIDE - Carbamidomethyl cysteine
- • .(Acetyl)PEPTIDE - N-terminal acetylation""",
- placeholder="e.g., M(Oxidation)PEPTIDE, C(Carbamidomethyl)PEPTIDE",
- )
-
-with col2_input:
- # Charge range inputs
-
- default_charge = 2
-
- charge_col1, charge_col2 = st.columns(2)
- with charge_col1:
- min_charge = st.number_input(
- "Min Charge",
- min_value=1,
- max_value=20,
- value=default_charge,
- step=1
- )
- with charge_col2:
- max_charge = st.number_input(
- "Max Charge",
- min_value=1,
- max_value=200,
- value=min(default_charge + 2, 6),
- step=1
- )
-
- # Ensure valid range
- if min_charge > max_charge:
- st.error("Min charge must be ≤ Max charge")
- min_charge = max_charge
-
-# Calculate button
-calculate_btn = st.button(
- "🧮 Calculate m/z",
- type="primary",
- use_container_width=True
-)
-
-st.markdown("---")
-
-# Results section
-if calculate_btn:
- if not sequence_input.strip():
- st.error("Please enter a peptide sequence.")
- else:
- # Validate sequence
- is_valid, error_msg = validate_sequence(sequence_input)
-
- if not is_valid:
- st.error(f"Invalid sequence: {error_msg}")
- else:
- try:
- with st.spinner("Calculating m/z ratios..."):
- results = calculate_peptide_mz_range(
- sequence_input,
- (min_charge, max_charge)
- )
-
- st.success("✅ Calculation Complete!")
-
- # Results display
- result_col1, result_col2 = st.columns(2)
-
- with result_col1:
- st.markdown("### 📊 m/z Results")
-
- charge_results = results.get("charge_results", {})
- charge_states = sorted(charge_results.keys())
-
- # Display results
- if len(charge_states) <= 5:
- # Simple list for few charge states
- for charge in charge_states:
- charge_data = charge_results[charge]
- mz_value = charge_data['mz_ratio']
- st.markdown(f"**Charge +{charge}:** {mz_value:.6f}")
- else:
- # Table for many charge states
- table_data = []
- for charge in charge_states:
- charge_data = charge_results[charge]
- table_data.append({
- "Charge": f"+{charge}",
- "m/z": f"{charge_data['mz_ratio']:.6f}"
- })
-
- df = pd.DataFrame(table_data)
- st.dataframe(df, use_container_width=True, hide_index=True)
-
- st.markdown(f"**Monoisotopic Mass:** {results['monoisotopic_mass']:.6f} Da")
-
- with result_col2:
- st.markdown("### 🧪 Sequence Information")
- st.markdown(f"**Input Sequence:** {sequence_input}")
- st.markdown(f"**Molecular Formula:** {results['molecular_formula']}")
- st.markdown(f"**Length:** {results['sequence_length']} amino acids")
-
- # Additional details
- with st.expander("📋 Additional Details"):
- # Amino acid composition
- aa_composition = results["aa_composition"]
- if aa_composition:
- st.markdown("**Amino Acid Composition:**")
- composition_text = ", ".join([
- f"{aa}: {count}"
- for aa, count in sorted(aa_composition.items())
- ])
- st.markdown(composition_text)
-
- except Exception as e:
- st.error(f"Calculation error: {str(e)}")
-
- st.markdown("""
- **Common Issues:**
- - Use correct AASequence format: `M(Oxidation)PEPTIDE`
- - Check modification names: `(Carbamidomethyl)`, `(Oxidation)`
- - Verify amino acid codes (standard 20 + X, U)
- - Use dots for terminal mods: `.(Acetyl)PEPTIDE`
- """)
-
-# About section
-st.markdown("---")
-with st.expander("ℹ️ **About This Peptide m/z Calculator**"):
- st.markdown("""
- **AASequence Format Support:**
- - Uses PyOpenMS `AASequence.fromString()` directly
- - No complex parsing or format conversion
- - Native support for modifications and charge notation
- - Standardized output format
-
- **Supported Amino Acids:**
- Standard 20 amino acids (A, R, N, D, C, E, Q, G, H, I, L, K, M, F, P, S, T, W, Y, V) plus X (any) and U (selenocysteine)
-
- **Modification Format:**
- - Named modifications: `(Oxidation)`, `(Carbamidomethyl)`, `(Phospho)`
- - Terminal modifications: `.(Acetyl)PEPTIDE`, `PEPTIDE(Amidated).`
- - Mass deltas: `[+15.994915]`, `[-18.010565]`
- - UNIMOD notation: `[UNIMOD:4]`, `[UNIMOD:35]`
-
- """)
diff --git a/content/raw_data_viewer.py b/content/raw_data_viewer.py
deleted file mode 100644
index d788e25..0000000
--- a/content/raw_data_viewer.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from pathlib import Path
-
-import streamlit as st
-
-from src.common.common import page_setup
-from src import view
-
-
-params = page_setup()
-
-st.title("View raw MS data")
-
-# File selection can not be in fragment since it influences the subsequent sections
-cols = st.columns(3)
-
-mzML_dir = Path(st.session_state.workspace, "mzML-files")
-file_options = [f.name for f in mzML_dir.iterdir() if "external_files.txt" not in f.name]
-
-# Check if local files are available
-external_files = Path(mzML_dir, "external_files.txt")
-if external_files.exists():
- with open(external_files, "r") as f_handle:
- external_files = f_handle.readlines()
- external_files = [f.strip() for f in external_files]
- file_options += external_files
-
-selected_file = cols[0].selectbox(
- "choose file",
- file_options,
- key="view_selected_file"
-)
-if selected_file:
- view.get_df(Path(st.session_state.workspace, "mzML-files", selected_file))
-
-
- tabs = st.tabs(
- ["📈 Peak map (MS1)", "📈 Spectra (MS1 + MS2)", "📈 Chromatograms (MS1)"]
- )
- with tabs[0]:
- view.view_peak_map()
- with tabs[1]:
- view.view_spectrum()
- with tabs[2]:
- view.view_bpc_tic()
diff --git a/content/run_example_workflow.py b/content/run_example_workflow.py
deleted file mode 100644
index 49e70e9..0000000
--- a/content/run_example_workflow.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import streamlit as st
-
-from pathlib import Path
-
-from src.common.common import page_setup, save_params
-from src import mzmlfileworkflow
-
-# Page name "workflow" will show mzML file selector in sidebar
-params = page_setup()
-
-st.title("Workflow")
-st.markdown(
- """
-More complex workflow with mzML files and input form.
-
-Changing widgets within the form will not trigger the execution of the script immediatly.
-This is great for large parameter sections.
-"""
-)
-
-with st.form("workflow-with-mzML-form"):
- st.markdown("**Parameters**")
-
- file_options = [f.stem for f in Path(st.session_state.workspace, "mzML-files").glob("*.mzML") if "external_files.txt" not in f.name]
-
- # Check if local files are available
- external_files = Path(Path(st.session_state.workspace, "mzML-files"), "external_files.txt")
- if external_files.exists():
- with open(external_files, "r") as f_handle:
- external_files = f_handle.readlines()
- external_files = [str(Path(f.strip()).with_suffix('')) for f in external_files]
- file_options += external_files
-
- st.multiselect(
- "**input mzML files**",
- file_options,
- params["example-workflow-selected-mzML-files"],
- key="example-workflow-selected-mzML-files",
- )
-
- c1, _, c3 = st.columns(3)
- if c1.form_submit_button(
- "Save Parameters", help="Save changes made to parameter section."
- ):
- params = save_params(params)
- run_workflow_button = c3.form_submit_button("Run Workflow", type="primary")
-
-result_dir = Path(st.session_state["workspace"], "mzML-workflow-results")
-
-if run_workflow_button:
- params = save_params(params)
- if params["example-workflow-selected-mzML-files"]:
- mzmlfileworkflow.run_workflow(params, result_dir)
- else:
- st.warning("Select some mzML files.")
-
-
-
-mzmlfileworkflow.result_section(result_dir)
\ No newline at end of file
diff --git a/content/run_subprocess.py b/content/run_subprocess.py
deleted file mode 100644
index 8aebfa7..0000000
--- a/content/run_subprocess.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import streamlit as st
-import threading
-import os
-
-from pathlib import Path
-
-from src.common.common import page_setup, save_params
-from src.run_subprocess import run_subprocess
-
-# Page name "workflow" will show mzML file selector in sidebar
-params = page_setup()
-
-st.title("Run subprocess")
-st.markdown(
- """
- This example demonstrates how to run an external process (in this case, the Linux command 'grep' or 'findstr' for windows) as a subprocess to extract IDs from the selected mzML file while displaying the process output.
- It also works with longer-running processes, such as calling an OpenMS TOPP tool.
- """
-)
-
-# Define the directory where mzML files are located
-mzML_dir: Path = Path(st.session_state.workspace, "mzML-files")
-
-# Create two columns for the Streamlit app layout
-col1, col2 = st.columns(2)
-
-# Use the `glob` method to get a list of all files in the directory
-file_list = list(mzML_dir.glob("*"))
-
-# select box to select file from user
-file_name = st.selectbox("**Please select file**", [file.stem for file in file_list])
-
-# full path of file
-mzML_file_path = os.path.join(mzML_dir, str(file_name) + ".mzML")
-
-# Create a dictionary to capture the output and status of the subprocess
-result_dict = {}
-result_dict["success"] = False
-result_dict["log"] = " "
-
-# Create a flag to terminate the subprocess
-terminate_flag = threading.Event()
-terminate_flag.set()
-
-
-# Function to terminate the subprocess
-def terminate_subprocess():
- """Set flag to terminate subprocess."""
- global terminate_flag
- terminate_flag.set()
-
-
-# Check if the "Extract ids" button is clicked
-if st.button("Extract ids"):
- # Check if the "Terminate/Clear" button is clicked to stop the subprocess and clear the form
- if st.button("Terminate/Clear"):
- # Terminate the subprocess
- terminate_subprocess()
- st.warning("Process terminated. The analysis may not be complete.")
- # Reset the page
- st.rerun()
-
- # Display a status message while running the analysis
- with st.status("Please wait until fetching all ids from mzML 😑"):
-
- # Define the command to run as a subprocess (example: grep or findstr (for windows))
- # 'nt' indicates Windows
- if os.name == 'nt':
- args = ["findstr", "idRef", mzML_file_path]
- else:
- # Assume 'posix' for Linux and macOS
- args =["grep", "idRef", mzML_file_path]
-
- # Display the command that will be executed
- message = f"Running command: {' '.join(args)}"
- st.code(message)
-
- # Run the subprocess command
- run_subprocess(args, result_dict)
-
- # Check if the subprocess was successful
- if result_dict["success"]:
- # Here can add code here to handle the results, e.g., display them to the user
-
- pass # Placeholder for result handling
-
-
-# At the end of each page, always save parameters (including any changes via widgets with key)
-save_params(params)
diff --git a/content/simple_workflow.py b/content/simple_workflow.py
deleted file mode 100644
index 130dd43..0000000
--- a/content/simple_workflow.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import streamlit as st
-
-from src.common.common import page_setup, save_params, show_table
-from src import simpleworkflow
-
-# Page name "workflow" will show mzML file selector in sidebar
-params = page_setup()
-
-st.title("Simple Workflow")
-st.markdown("Example for a simple workflow with quick execution times.")
-
-# Define two widgets with values from paramter file
-# To save them as parameters use the same key as in the json file
-
-# We access the x-dimension via local variable
-xdimension = st.number_input(
- label="x dimension",
- min_value=1,
- max_value=20,
- value=params["example-x-dimension"],
- step=1,
- key="example-x-dimension",
-)
-
-st.number_input(
- label="y dimension",
- min_value=1,
- max_value=20,
- value=params["example-y-dimension"],
- step=1,
- key="example-y-dimension",
-)
-
-# Get a dataframe with x and y dimensions via time consuming (sleep) cached function
-# If the input has been given before, the function does not run again
-# Input x from local variable, input y from session state via key
-df = simpleworkflow.generate_random_table(
- xdimension, st.session_state["example-y-dimension"]
-)
-
-# Display dataframe via custom show_table function, which will render a download button as well
-show_table(df, download_name="random-table")
-
-# At the end of each page, always save parameters (including any changes via widgets with key)
-save_params(params)
diff --git a/content/topp_workflow_execution.py b/content/topp_workflow_execution.py
deleted file mode 100644
index 2248d00..0000000
--- a/content/topp_workflow_execution.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import streamlit as st
-from src.common.common import page_setup
-from src.Workflow import Workflow
-
-
-params = page_setup()
-
-wf = Workflow()
-
-wf.show_execution_section()
-
-
diff --git a/content/topp_workflow_file_upload.py b/content/topp_workflow_file_upload.py
deleted file mode 100644
index ec58e2f..0000000
--- a/content/topp_workflow_file_upload.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import streamlit as st
-from src.common.common import page_setup
-from src.Workflow import Workflow
-
-
-params = page_setup()
-
-wf = Workflow()
-
-wf.show_file_upload_section()
-
diff --git a/content/topp_workflow_parameter.py b/content/topp_workflow_parameter.py
deleted file mode 100644
index 26a602d..0000000
--- a/content/topp_workflow_parameter.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import streamlit as st
-from src.common.common import page_setup
-from src.Workflow import Workflow
-
-
-params = page_setup()
-
-wf = Workflow()
-
-wf.show_parameter_section()
diff --git a/content/topp_workflow_results.py b/content/topp_workflow_results.py
deleted file mode 100644
index d6db8b5..0000000
--- a/content/topp_workflow_results.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import streamlit as st
-from src.common.common import page_setup
-from src.Workflow import Workflow
-
-
-params = page_setup()
-
-wf = Workflow()
-
-wf.show_results_section()
\ No newline at end of file
diff --git a/test.py b/test.py
deleted file mode 100644
index 8a2a3ad..0000000
--- a/test.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# test_my_math.py
-import unittest
-from urllib.request import urlretrieve
-
-from src.simpleworkflow import generate_random_table
-from src.mzmlfileworkflow import mzML_file_get_num_spectra
-
-from pathlib import Path
-
-class TestSimpleWorkflow(unittest.TestCase):
- def test_workflow(self):
- result = generate_random_table(2, 3).shape
- self.assertEqual(result, (2,3), "Expected dataframe shape.")
-
-class TestComplexWorkflow(unittest.TestCase):
- def test_workflow(self):
- # load data from url
- urlretrieve("https://raw.githubusercontent.com/OpenMS/streamlit-template/main/example-data/mzML/Treatment.mzML", "testfile.mzML")
- result = mzML_file_get_num_spectra("testfile.mzML")
- Path("testfile.mzML").unlink()
- self.assertEqual(result, 786, "Expected dataframe shape.")
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/test_gui.py b/test_gui.py
index 101865c..939f41f 100644
--- a/test_gui.py
+++ b/test_gui.py
@@ -1,134 +1,31 @@
-from streamlit.testing.v1 import AppTest
-import pytest
-from src import fileupload
import json
-from pathlib import Path
-import shutil
-@pytest.fixture
-def launch(request):
- test = AppTest.from_file(request.param)
-
- ## Initialize session state ##
+def test_settings_json_valid():
+ """Test that settings.json exists and contains required fields."""
with open("settings.json", "r") as f:
- test.session_state.settings = json.load(f)
- test.session_state.settings["test"] = True
- test.secrets["workspace"] = "test"
- return test
-
-
-# Test launching of all pages
-@pytest.mark.parametrize(
- "launch",
- (
- # "content/quickstart.py", # NOTE: this page does not work due to streamlit.errors.StreamlitPageNotFoundError error
- "content/documentation.py",
- "content/topp_workflow_file_upload.py",
- "content/topp_workflow_parameter.py",
- "content/topp_workflow_execution.py",
- "content/topp_workflow_results.py",
- "content/file_upload.py",
- "content/raw_data_viewer.py",
- "content/run_example_workflow.py",
- "content/download_section.py",
- "content/simple_workflow.py",
- "content/run_subprocess.py",
- ),
- indirect=True,
-)
-def test_launch(launch):
- """Test if all pages can be launched without errors."""
- launch.run(timeout=30) # Increased timeout from 10 to 30 seconds
- assert not launch.exception
-
-
-########### PAGE SPECIFIC TESTS ############
-@pytest.mark.parametrize(
- "launch,selection",
- [
- ("content/documentation.py", "User Guide"),
- ("content/documentation.py", "Installation"),
- (
- "content/documentation.py",
- "Developers Guide: How to build app based on this template",
- ),
- ("content/documentation.py", "Developers Guide: TOPP Workflow Framework"),
- ("content/documentation.py", "Developer Guide: Windows Executables"),
- ("content/documentation.py", "Developers Guide: Deployment"),
- ],
- indirect=["launch"],
-)
-def test_documentation(launch, selection):
- launch.run()
- launch.selectbox[0].select(selection).run()
- assert not launch.exception
-
-
-@pytest.mark.parametrize("launch", ["content/file_upload.py"], indirect=True)
-def test_file_upload_load_example(launch):
- launch.run()
- for i in launch.tabs:
- if i.label == "Example Data":
- i.button[0].click().run()
- assert not launch.exception
-
-
-# NOTE: All tabs are automatically checked
-@pytest.mark.parametrize(
- "launch,example",
- [
- ("content/raw_data_viewer.py", "Blank.mzML"),
- ("content/raw_data_viewer.py", "Treatment.mzML"),
- ("content/raw_data_viewer.py", "Pool.mzML"),
- ("content/raw_data_viewer.py", "Control.mzML"),
- ],
- indirect=["launch"],
-)
-def test_view_raw_ms_data(launch, example):
- launch.run(timeout=30) # Increased timeout from 10 to 30 seconds
-
- ## Load Example file, based on implementation of fileupload.load_example_mzML_files() ###
- mzML_dir = Path(launch.session_state.workspace, "mzML-files")
-
- # Copy files from example-data/mzML to workspace mzML directory, add to selected files
- for f in Path("example-data", "mzML").glob("*.mzML"):
- shutil.copy(f, mzML_dir)
- launch.run()
-
- ## TODO: Figure out a way to select a spectrum to be displayed
- launch.selectbox[0].select(example).run()
- assert not launch.exception
-
-
-@pytest.mark.parametrize(
- "launch,example",
- [
- ("content/run_example_workflow.py", ["Blank"]),
- ("content/run_example_workflow.py", ["Treatment"]),
- ("content/run_example_workflow.py", ["Pool"]),
- ("content/run_example_workflow.py", ["Control"]),
- ("content/run_example_workflow.py", ["Control", "Blank"]),
- ],
- indirect=["launch"],
-)
-def test_run_workflow(launch, example):
- launch.run()
- ## Load Example file, based on implementation of fileupload.load_example_mzML_files() ###
- mzML_dir = Path(launch.session_state.workspace, "mzML-files")
-
- # Copy files from example-data/mzML to workspace mzML directory, add to selected files
- for f in Path("example-data", "mzML").glob("*.mzML"):
- shutil.copy(f, mzML_dir)
- launch.run()
-
- ## Select experiments to process
- for e in example:
- launch.multiselect[0].select(e)
-
- launch.run()
- assert not launch.exception
-
- # Press the "Run Workflow" button
- launch.button[1].click().run(timeout=60)
- assert not launch.exception
+ settings = json.load(f)
+ assert "app-name" in settings
+ assert "version" in settings
+
+
+def test_content_pages_exist():
+ """Test that all content pages referenced by app.py exist."""
+ from pathlib import Path
+
+ expected_pages = [
+ "content/quickstart.py",
+ "content/workflow_fileupload.py",
+ "content/workflow_configure.py",
+ "content/workflow_run.py",
+ "content/results_database_search.py",
+ "content/results_rescoring.py",
+ "content/results_filtered.py",
+ "content/results_abundance.py",
+ "content/results_volcano.py",
+ "content/results_pca.py",
+ "content/results_heatmap.py",
+ "content/results_library.py",
+ ]
+ for page in expected_pages:
+ assert Path(page).exists(), f"Content page {page} is missing"
diff --git a/tests/test_run_subprocess.py b/tests/test_run_subprocess.py
deleted file mode 100644
index cd6889a..0000000
--- a/tests/test_run_subprocess.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import pytest
-import time
-from streamlit.testing.v1 import AppTest
-
-@pytest.fixture
-def launch():
- """Launch the Run Subprocess Streamlit page for testing."""
-
- app = AppTest.from_file("content/run_subprocess.py")
- app.run(timeout=10)
- return app
-
-def test_file_selection(launch):
- """Ensure a file can be selected from the dropdown."""
- launch.run()
-
- assert len(launch.selectbox) > 0, "No file selection dropdown found!"
-
- if len(launch.selectbox[0].options) > 0:
- launch.selectbox[0].select(launch.selectbox[0].options[0])
- launch.run()
-
-
-def test_extract_ids_button(launch):
- """Ensure clicking 'Extract IDs' triggers process and UI updates accordingly."""
- launch.run(timeout=10)
- time.sleep(3)
-
- # Ensure 'Extract ids' button exists
- extract_button = next((btn for btn in launch.button if "Extract ids" in btn.label), None)
- assert extract_button is not None, "Extract ids button not found!"
-
- # Click the 'Extract ids' button
- extract_button.click()
- launch.run(timeout=10)
-
- print("Extract ids button was clicked successfully!")
\ No newline at end of file
diff --git a/tests/test_simple_workflow.py b/tests/test_simple_workflow.py
deleted file mode 100644
index 5a94c41..0000000
--- a/tests/test_simple_workflow.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import pytest
-import time
-from streamlit.testing.v1 import AppTest
-
-"""
-Tests for the Simple Workflow page functionality.
-
-These tests verify:
-- Number input widgets function correctly
-- Session state updates properly
-- Table generation with correct dimensions
-- Download button presence
-"""
-
-@pytest.fixture
-def launch():
- """Launch the Simple Workflow page for testing."""
- app = AppTest.from_file("content/simple_workflow.py")
- app.run(timeout=15)
- return app
-
-def test_number_inputs(launch):
- """Ensure x and y dimension inputs exist and update correctly."""
-
- assert len(launch.number_input) >= 2, f"Expected at least 2 number inputs, found {len(launch.number_input)}"
-
- # Set x and y dimensions
- x_input = next((ni for ni in launch.number_input if ni.key == "example-x-dimension"), None)
- y_input = next((ni for ni in launch.number_input if ni.key == "example-y-dimension"), None)
-
- assert x_input is not None, "X-dimension input not found!"
- assert y_input is not None, "Y-dimension input not found!"
-
- x_input.set_value(5)
- y_input.set_value(4)
- launch.run(timeout=10)
-
- # Validate session state updates
- assert "example-x-dimension" in launch.session_state, "X-dimension key missing in session state!"
- assert "example-y-dimension" in launch.session_state, "Y-dimension key missing in session state!"
- assert launch.session_state["example-x-dimension"] == 5, "X-dimension not updated!"
- assert launch.session_state["example-y-dimension"] == 4, "Y-dimension not updated!"
-
- assert len(launch.dataframe) > 0, "Table not generated!"
-
- df = launch.dataframe[0].value
- assert df.shape == (5, 4), f"Expected table size (5,4) but got {df.shape}"
-
-def test_download_button(launch):
- """Ensure 'Download Table' button appears after table generation."""
-
- # Locate number inputs by key
- x_input = next((ni for ni in launch.number_input if ni.key == "example-x-dimension"), None)
- y_input = next((ni for ni in launch.number_input if ni.key == "example-y-dimension"), None)
-
- assert x_input is not None, "X-dimension input not found!"
- assert y_input is not None, "Y-dimension input not found!"
-
- # Set values and trigger app update
- x_input.set_value(3)
- y_input.set_value(2)
- launch.run(timeout=15)
- time.sleep(5)
-
- assert len(launch.dataframe) > 0, "Table not generated!"
-
- # Find the "Download Table" button correctly
- download_elements = [comp for comp in launch.main if hasattr(comp, "label") and "Download" in comp.label]
- assert len(download_elements) > 0, "Download Table button is missing!"
From ca69f3a01a1d91782e363c8c210fbd38e048d387 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 14 Mar 2026 09:00:14 +0000
Subject: [PATCH 03/11] Add Dockerfile_simple and restore test files with
updated references
- Add missing Dockerfile_simple for the build-simple-app Docker CI job
- Restore test.py, test_gui.py, tests/test_simple_workflow.py,
tests/test_run_subprocess.py, and workflow-tests.yml
- Update test references to point at actual existing content pages
instead of removed template pages
Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com>
---
.github/workflows/workflow-tests.yml | 3 +
Dockerfile_simple | 153 +++++++++++++++++++++++++++
test.py | 39 +++++++
test_gui.py | 47 ++++----
tests/test_run_subprocess.py | 18 ++++
tests/test_simple_workflow.py | 26 +++++
6 files changed, 264 insertions(+), 22 deletions(-)
create mode 100644 Dockerfile_simple
create mode 100644 test.py
create mode 100644 tests/test_run_subprocess.py
create mode 100644 tests/test_simple_workflow.py
diff --git a/.github/workflows/workflow-tests.yml b/.github/workflows/workflow-tests.yml
index b459a2d..92b0b99 100644
--- a/.github/workflows/workflow-tests.yml
+++ b/.github/workflows/workflow-tests.yml
@@ -20,6 +20,9 @@ jobs:
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pytest
+ - name: Running test cases
+ run: |
+ pytest test.py
- name: Running GUI tests
run: |
pytest test_gui.py
diff --git a/Dockerfile_simple b/Dockerfile_simple
new file mode 100644
index 0000000..566668b
--- /dev/null
+++ b/Dockerfile_simple
@@ -0,0 +1,153 @@
+# This Dockerfile creates a container with pyOpenMS
+# It also adds a basic streamlit server that serves a pyOpenMS-based app.
+# hints:
+# build image with: docker build -f Dockerfile_simple --no-cache -t streamlitapp-simple:latest --build-arg GITHUB_TOKEN= . 2>&1 | tee build.log
+# check if image was build: docker image ls
+# run container: docker run -p 8501:8501 streamlitapp-simple:latest
+# debug container after build (comment out ENTRYPOINT) and run container with interactive /bin/bash shell
+# prune unused images/etc. to free disc space (e.g. might be needed on gitpod). Use with care.: docker system prune --all --force
+
+FROM ubuntu:22.04 AS stage1
+ARG OPENMS_REPO=https://github.com/OpenMS/OpenMS.git
+ARG OPENMS_BRANCH=develop
+ARG PORT=8501
+# GitHub token to download latest OpenMS executable for Windows from Github action artifact.
+ARG GITHUB_TOKEN
+ENV GH_TOKEN=${GITHUB_TOKEN}
+# Streamlit app Gihub user name (to download artifact from).
+ARG GITHUB_USER=OpenMS
+# Streamlit app Gihub repository name (to download artifact from).
+ARG GITHUB_REPO=quantms-web
+
+
+# Step 1: set up a sane build system
+USER root
+
+RUN apt-get -y update
+# note: streamlit in docker needs libgtk2.0-dev (see https://yugdamor.medium.com/importerror-libgthread-2-0-so-0-cannot-open-shared-object-file-no-such-file-or-directory-895b94a7827b)
+RUN apt-get install -y --no-install-recommends --no-install-suggests wget ca-certificates libgtk2.0-dev curl jq cron nginx
+RUN update-ca-certificates
+
+# Install Github CLI
+RUN (type -p wget >/dev/null || (apt-get update && apt-get install wget -y)) \
+ && mkdir -p -m 755 /etc/apt/keyrings \
+ && wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \
+ && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
+ && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
+ && apt-get update \
+ && apt-get install gh -y
+
+# Download and install miniforge.
+ENV PATH="/root/miniforge3/bin:${PATH}"
+RUN wget -q \
+ https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh \
+ && bash Miniforge3-Linux-x86_64.sh -b \
+ && rm -f Miniforge3-Linux-x86_64.sh
+RUN mamba --version
+
+# Setup mamba environment.
+RUN mamba create -n streamlit-env python=3.10
+RUN echo "mamba activate streamlit-env" >> ~/.bashrc
+SHELL ["/bin/bash", "--rcfile", "~/.bashrc"]
+SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"]
+
+#################################### install streamlit
+# install packages
+COPY requirements.txt requirements.txt
+RUN mamba install pip
+RUN python -m pip install --upgrade pip
+RUN python -m pip install -r requirements.txt
+
+
+# create workdir and copy over all streamlit related files/folders
+WORKDIR /app
+# note: specifying folder with slash as suffix and repeating the folder name seems important to preserve directory structure
+WORKDIR /app
+COPY assets/ /app/assets
+COPY content/ /app/content
+COPY example-data/ /app/example-data
+COPY gdpr_consent/ /app/gdpr_consent
+COPY hooks/ /app/hooks
+COPY src/ /app/src
+COPY utils/ /app/utils
+COPY app.py /app/app.py
+COPY settings.json /app/settings.json
+COPY default-parameters.json /app/default-parameters.json
+COPY presets.json /app/presets.json
+
+# For streamlit configuration
+COPY .streamlit/ /app/.streamlit/
+
+COPY clean-up-workspaces.py /app/clean-up-workspaces.py
+
+# add cron job to the crontab
+RUN echo "0 3 * * * /root/miniforge3/envs/streamlit-env/bin/python /app/clean-up-workspaces.py >> /app/clean-up-workspaces.log 2>&1" | crontab -
+
+# Number of Streamlit server instances for load balancing (default: 1 = no load balancer)
+# Set to >1 to enable nginx load balancer with multiple Streamlit instances
+ENV STREAMLIT_SERVER_COUNT=1
+
+# create entrypoint script to start cron service and launch streamlit app
+RUN echo -e '#!/bin/bash\n\
+set -e\n\
+source /root/miniforge3/bin/activate streamlit-env\n\
+\n\
+# Start cron for workspace cleanup\n\
+service cron start\n\
+\n\
+# Load balancer setup\n\
+SERVER_COUNT=${STREAMLIT_SERVER_COUNT:-1}\n\
+\n\
+if [ "$SERVER_COUNT" -gt 1 ]; then\n\
+ echo "Starting $SERVER_COUNT Streamlit instances with nginx load balancer..."\n\
+\n\
+ # Generate nginx upstream block\n\
+ UPSTREAM_SERVERS=""\n\
+ BASE_PORT=8510\n\
+ for i in $(seq 0 $((SERVER_COUNT - 1))); do\n\
+ PORT=$((BASE_PORT + i))\n\
+ UPSTREAM_SERVERS="${UPSTREAM_SERVERS} server 127.0.0.1:${PORT};\\n"\n\
+ done\n\
+\n\
+ # Write nginx config\n\
+ mkdir -p /etc/nginx\n\
+ echo -e "worker_processes auto;\\npid /run/nginx.pid;\\n\\nevents {\\n worker_connections 1024;\\n}\\n\\nhttp {\\n client_max_body_size 0;\\n\\n map \\$cookie_stroute \\$route_key {\\n \\x22\\x22 \\$request_id;\\n default \\$cookie_stroute;\\n }\\n\\n upstream streamlit_backend {\\n hash \\$route_key consistent;\\n${UPSTREAM_SERVERS} }\\n\\n map \\$http_upgrade \\$connection_upgrade {\\n default upgrade;\\n \\x27\\x27 close;\\n }\\n\\n server {\\n listen 0.0.0.0:8501;\\n\\n location / {\\n proxy_pass http://streamlit_backend;\\n proxy_http_version 1.1;\\n proxy_set_header Upgrade \\$http_upgrade;\\n proxy_set_header Connection \\$connection_upgrade;\\n proxy_set_header Host \\$host;\\n proxy_set_header X-Real-IP \\$remote_addr;\\n proxy_set_header X-Forwarded-For \\$proxy_add_x_forwarded_for;\\n proxy_set_header X-Forwarded-Proto \\$scheme;\\n proxy_read_timeout 86400;\\n proxy_send_timeout 86400;\\n proxy_buffering off;\\n add_header Set-Cookie \\x22stroute=\\$route_key; Path=/; HttpOnly; SameSite=Lax\\x22 always;\\n }\\n }\\n}" > /etc/nginx/nginx.conf\n\
+\n\
+ # Start Streamlit instances on internal ports\n\
+ for i in $(seq 0 $((SERVER_COUNT - 1))); do\n\
+ PORT=$((BASE_PORT + i))\n\
+ echo "Starting Streamlit instance on port $PORT..."\n\
+ streamlit run app.py --server.port $PORT --server.address 0.0.0.0 &\n\
+ done\n\
+\n\
+ sleep 2\n\
+ echo "Starting nginx load balancer on port 8501..."\n\
+ exec /usr/sbin/nginx -g "daemon off;"\n\
+else\n\
+ # Single instance mode (default) - run Streamlit directly on port 8501\n\
+ echo "Starting Streamlit app..."\n\
+ exec streamlit run app.py --server.address 0.0.0.0\n\
+fi\n\
+' > /app/entrypoint.sh
+# make the script executable
+RUN chmod +x /app/entrypoint.sh
+
+# Patch Analytics
+RUN mamba run -n streamlit-env python hooks/hook-analytics.py
+
+# Set Online Deployment
+RUN jq '.online_deployment = true' settings.json > tmp.json && mv tmp.json settings.json
+
+# Download latest OpenMS App executable as a ZIP file
+RUN if [ -n "$GH_TOKEN" ]; then \
+ echo "GH_TOKEN is set, proceeding to download the release asset..."; \
+ gh release download -R ${GITHUB_USER}/${GITHUB_REPO} -p "OpenMS-App.zip" -D /app; \
+ else \
+ echo "GH_TOKEN is not set, skipping the release asset download."; \
+ fi
+
+# make sure that mamba environment is used
+SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"]
+
+EXPOSE $PORT
+ENTRYPOINT ["/app/entrypoint.sh"]
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..68a82fd
--- /dev/null
+++ b/test.py
@@ -0,0 +1,39 @@
+import unittest
+import json
+from pathlib import Path
+
+
+class TestSettingsJson(unittest.TestCase):
+ def test_settings_json_exists(self):
+ self.assertTrue(Path("settings.json").exists(), "settings.json file is missing")
+
+ def test_settings_json_valid(self):
+ with open("settings.json", "r") as f:
+ settings = json.load(f)
+ self.assertIn("app-name", settings)
+ self.assertIn("version", settings)
+
+
+class TestContentPagesExist(unittest.TestCase):
+ def test_all_content_pages_exist(self):
+ """Test that all content pages referenced by app.py exist."""
+ expected_pages = [
+ "content/quickstart.py",
+ "content/workflow_fileupload.py",
+ "content/workflow_configure.py",
+ "content/workflow_run.py",
+ "content/results_database_search.py",
+ "content/results_rescoring.py",
+ "content/results_filtered.py",
+ "content/results_abundance.py",
+ "content/results_volcano.py",
+ "content/results_pca.py",
+ "content/results_heatmap.py",
+ "content/results_library.py",
+ ]
+ for page in expected_pages:
+ self.assertTrue(Path(page).exists(), f"Content page {page} is missing")
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test_gui.py b/test_gui.py
index 939f41f..467d116 100644
--- a/test_gui.py
+++ b/test_gui.py
@@ -1,31 +1,34 @@
+from streamlit.testing.v1 import AppTest
+import pytest
import json
-def test_settings_json_valid():
- """Test that settings.json exists and contains required fields."""
- with open("settings.json", "r") as f:
- settings = json.load(f)
- assert "app-name" in settings
- assert "version" in settings
+@pytest.fixture
+def launch(request):
+ test = AppTest.from_file(request.param)
+ ## Initialize session state ##
+ with open("settings.json", "r") as f:
+ test.session_state.settings = json.load(f)
+ test.session_state.settings["test"] = True
+ test.secrets["workspace"] = "test"
+ return test
-def test_content_pages_exist():
- """Test that all content pages referenced by app.py exist."""
- from pathlib import Path
- expected_pages = [
- "content/quickstart.py",
+# Test launching of all pages
+@pytest.mark.parametrize(
+ "launch",
+ (
"content/workflow_fileupload.py",
"content/workflow_configure.py",
"content/workflow_run.py",
- "content/results_database_search.py",
- "content/results_rescoring.py",
- "content/results_filtered.py",
- "content/results_abundance.py",
- "content/results_volcano.py",
- "content/results_pca.py",
- "content/results_heatmap.py",
- "content/results_library.py",
- ]
- for page in expected_pages:
- assert Path(page).exists(), f"Content page {page} is missing"
+ "content/digest.py",
+ "content/fragmentation.py",
+ "content/isotope_pattern_generator.py",
+ ),
+ indirect=True,
+)
+def test_launch(launch):
+ """Test if all pages can be launched without errors."""
+ launch.run(timeout=30)
+ assert not launch.exception
diff --git a/tests/test_run_subprocess.py b/tests/test_run_subprocess.py
new file mode 100644
index 0000000..91150bc
--- /dev/null
+++ b/tests/test_run_subprocess.py
@@ -0,0 +1,18 @@
+import pytest
+from streamlit.testing.v1 import AppTest
+import json
+
+@pytest.fixture
+def launch():
+ """Launch the Workflow Configure Streamlit page for testing."""
+ app = AppTest.from_file("content/workflow_configure.py")
+ with open("settings.json", "r") as f:
+ app.session_state.settings = json.load(f)
+ app.session_state.settings["test"] = True
+ app.secrets["workspace"] = "test"
+ app.run(timeout=10)
+ return app
+
+def test_page_loads(launch):
+ """Ensure the workflow configure page loads without errors."""
+ assert not launch.exception
\ No newline at end of file
diff --git a/tests/test_simple_workflow.py b/tests/test_simple_workflow.py
new file mode 100644
index 0000000..7013451
--- /dev/null
+++ b/tests/test_simple_workflow.py
@@ -0,0 +1,26 @@
+import pytest
+from streamlit.testing.v1 import AppTest
+import json
+
+"""
+Tests for the Digest page functionality.
+
+These tests verify:
+- Page can be launched without errors
+- Session state initialization works correctly
+"""
+
+@pytest.fixture
+def launch():
+ """Launch the Digest page for testing."""
+ app = AppTest.from_file("content/digest.py")
+ with open("settings.json", "r") as f:
+ app.session_state.settings = json.load(f)
+ app.session_state.settings["test"] = True
+ app.secrets["workspace"] = "test"
+ app.run(timeout=30)
+ return app
+
+def test_page_loads(launch):
+ """Ensure the digest page loads without errors."""
+ assert not launch.exception
From bdfa8d5a99b42f354fa5af13fc3d8f566731caa2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 14 Mar 2026 09:00:57 +0000
Subject: [PATCH 04/11] Fix typos in Dockerfile_simple comments
Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com>
---
Dockerfile_simple | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Dockerfile_simple b/Dockerfile_simple
index 566668b..78fd2b1 100644
--- a/Dockerfile_simple
+++ b/Dockerfile_simple
@@ -14,9 +14,9 @@ ARG PORT=8501
# GitHub token to download latest OpenMS executable for Windows from Github action artifact.
ARG GITHUB_TOKEN
ENV GH_TOKEN=${GITHUB_TOKEN}
-# Streamlit app Gihub user name (to download artifact from).
+# Streamlit app GitHub user name (to download artifact from).
ARG GITHUB_USER=OpenMS
-# Streamlit app Gihub repository name (to download artifact from).
+# Streamlit app GitHub repository name (to download artifact from).
ARG GITHUB_REPO=quantms-web
From bd16d988ad126407d8d5e332e161ac1b9c107ccb Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 14 Mar 2026 09:04:59 +0000
Subject: [PATCH 05/11] Remove Dockerfile_simple and build-simple-app CI job
A simple Dockerfile does not make sense for this project.
Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com>
---
.github/workflows/build-docker-images.yml | 9 --
Dockerfile_simple | 153 ----------------------
2 files changed, 162 deletions(-)
delete mode 100644 Dockerfile_simple
diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
index fcb7722..e16dc46 100644
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@@ -16,12 +16,3 @@ jobs:
- uses: actions/checkout@v3
- name: Build the full Docker image
run: docker build . --file Dockerfile --tag streamlitapp:latest
-
- build-simple-app:
-
- runs-on: ubuntu-latest
-
- steps:
- - uses: actions/checkout@v3
- - name: Build the Docker image (pyOpenMS only)
- run: docker build . --file Dockerfile_simple --tag streamlitapp-simple:latest
\ No newline at end of file
diff --git a/Dockerfile_simple b/Dockerfile_simple
deleted file mode 100644
index 78fd2b1..0000000
--- a/Dockerfile_simple
+++ /dev/null
@@ -1,153 +0,0 @@
-# This Dockerfile creates a container with pyOpenMS
-# It also adds a basic streamlit server that serves a pyOpenMS-based app.
-# hints:
-# build image with: docker build -f Dockerfile_simple --no-cache -t streamlitapp-simple:latest --build-arg GITHUB_TOKEN= . 2>&1 | tee build.log
-# check if image was build: docker image ls
-# run container: docker run -p 8501:8501 streamlitapp-simple:latest
-# debug container after build (comment out ENTRYPOINT) and run container with interactive /bin/bash shell
-# prune unused images/etc. to free disc space (e.g. might be needed on gitpod). Use with care.: docker system prune --all --force
-
-FROM ubuntu:22.04 AS stage1
-ARG OPENMS_REPO=https://github.com/OpenMS/OpenMS.git
-ARG OPENMS_BRANCH=develop
-ARG PORT=8501
-# GitHub token to download latest OpenMS executable for Windows from Github action artifact.
-ARG GITHUB_TOKEN
-ENV GH_TOKEN=${GITHUB_TOKEN}
-# Streamlit app GitHub user name (to download artifact from).
-ARG GITHUB_USER=OpenMS
-# Streamlit app GitHub repository name (to download artifact from).
-ARG GITHUB_REPO=quantms-web
-
-
-# Step 1: set up a sane build system
-USER root
-
-RUN apt-get -y update
-# note: streamlit in docker needs libgtk2.0-dev (see https://yugdamor.medium.com/importerror-libgthread-2-0-so-0-cannot-open-shared-object-file-no-such-file-or-directory-895b94a7827b)
-RUN apt-get install -y --no-install-recommends --no-install-suggests wget ca-certificates libgtk2.0-dev curl jq cron nginx
-RUN update-ca-certificates
-
-# Install Github CLI
-RUN (type -p wget >/dev/null || (apt-get update && apt-get install wget -y)) \
- && mkdir -p -m 755 /etc/apt/keyrings \
- && wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \
- && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
- && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
- && apt-get update \
- && apt-get install gh -y
-
-# Download and install miniforge.
-ENV PATH="/root/miniforge3/bin:${PATH}"
-RUN wget -q \
- https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh \
- && bash Miniforge3-Linux-x86_64.sh -b \
- && rm -f Miniforge3-Linux-x86_64.sh
-RUN mamba --version
-
-# Setup mamba environment.
-RUN mamba create -n streamlit-env python=3.10
-RUN echo "mamba activate streamlit-env" >> ~/.bashrc
-SHELL ["/bin/bash", "--rcfile", "~/.bashrc"]
-SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"]
-
-#################################### install streamlit
-# install packages
-COPY requirements.txt requirements.txt
-RUN mamba install pip
-RUN python -m pip install --upgrade pip
-RUN python -m pip install -r requirements.txt
-
-
-# create workdir and copy over all streamlit related files/folders
-WORKDIR /app
-# note: specifying folder with slash as suffix and repeating the folder name seems important to preserve directory structure
-WORKDIR /app
-COPY assets/ /app/assets
-COPY content/ /app/content
-COPY example-data/ /app/example-data
-COPY gdpr_consent/ /app/gdpr_consent
-COPY hooks/ /app/hooks
-COPY src/ /app/src
-COPY utils/ /app/utils
-COPY app.py /app/app.py
-COPY settings.json /app/settings.json
-COPY default-parameters.json /app/default-parameters.json
-COPY presets.json /app/presets.json
-
-# For streamlit configuration
-COPY .streamlit/ /app/.streamlit/
-
-COPY clean-up-workspaces.py /app/clean-up-workspaces.py
-
-# add cron job to the crontab
-RUN echo "0 3 * * * /root/miniforge3/envs/streamlit-env/bin/python /app/clean-up-workspaces.py >> /app/clean-up-workspaces.log 2>&1" | crontab -
-
-# Number of Streamlit server instances for load balancing (default: 1 = no load balancer)
-# Set to >1 to enable nginx load balancer with multiple Streamlit instances
-ENV STREAMLIT_SERVER_COUNT=1
-
-# create entrypoint script to start cron service and launch streamlit app
-RUN echo -e '#!/bin/bash\n\
-set -e\n\
-source /root/miniforge3/bin/activate streamlit-env\n\
-\n\
-# Start cron for workspace cleanup\n\
-service cron start\n\
-\n\
-# Load balancer setup\n\
-SERVER_COUNT=${STREAMLIT_SERVER_COUNT:-1}\n\
-\n\
-if [ "$SERVER_COUNT" -gt 1 ]; then\n\
- echo "Starting $SERVER_COUNT Streamlit instances with nginx load balancer..."\n\
-\n\
- # Generate nginx upstream block\n\
- UPSTREAM_SERVERS=""\n\
- BASE_PORT=8510\n\
- for i in $(seq 0 $((SERVER_COUNT - 1))); do\n\
- PORT=$((BASE_PORT + i))\n\
- UPSTREAM_SERVERS="${UPSTREAM_SERVERS} server 127.0.0.1:${PORT};\\n"\n\
- done\n\
-\n\
- # Write nginx config\n\
- mkdir -p /etc/nginx\n\
- echo -e "worker_processes auto;\\npid /run/nginx.pid;\\n\\nevents {\\n worker_connections 1024;\\n}\\n\\nhttp {\\n client_max_body_size 0;\\n\\n map \\$cookie_stroute \\$route_key {\\n \\x22\\x22 \\$request_id;\\n default \\$cookie_stroute;\\n }\\n\\n upstream streamlit_backend {\\n hash \\$route_key consistent;\\n${UPSTREAM_SERVERS} }\\n\\n map \\$http_upgrade \\$connection_upgrade {\\n default upgrade;\\n \\x27\\x27 close;\\n }\\n\\n server {\\n listen 0.0.0.0:8501;\\n\\n location / {\\n proxy_pass http://streamlit_backend;\\n proxy_http_version 1.1;\\n proxy_set_header Upgrade \\$http_upgrade;\\n proxy_set_header Connection \\$connection_upgrade;\\n proxy_set_header Host \\$host;\\n proxy_set_header X-Real-IP \\$remote_addr;\\n proxy_set_header X-Forwarded-For \\$proxy_add_x_forwarded_for;\\n proxy_set_header X-Forwarded-Proto \\$scheme;\\n proxy_read_timeout 86400;\\n proxy_send_timeout 86400;\\n proxy_buffering off;\\n add_header Set-Cookie \\x22stroute=\\$route_key; Path=/; HttpOnly; SameSite=Lax\\x22 always;\\n }\\n }\\n}" > /etc/nginx/nginx.conf\n\
-\n\
- # Start Streamlit instances on internal ports\n\
- for i in $(seq 0 $((SERVER_COUNT - 1))); do\n\
- PORT=$((BASE_PORT + i))\n\
- echo "Starting Streamlit instance on port $PORT..."\n\
- streamlit run app.py --server.port $PORT --server.address 0.0.0.0 &\n\
- done\n\
-\n\
- sleep 2\n\
- echo "Starting nginx load balancer on port 8501..."\n\
- exec /usr/sbin/nginx -g "daemon off;"\n\
-else\n\
- # Single instance mode (default) - run Streamlit directly on port 8501\n\
- echo "Starting Streamlit app..."\n\
- exec streamlit run app.py --server.address 0.0.0.0\n\
-fi\n\
-' > /app/entrypoint.sh
-# make the script executable
-RUN chmod +x /app/entrypoint.sh
-
-# Patch Analytics
-RUN mamba run -n streamlit-env python hooks/hook-analytics.py
-
-# Set Online Deployment
-RUN jq '.online_deployment = true' settings.json > tmp.json && mv tmp.json settings.json
-
-# Download latest OpenMS App executable as a ZIP file
-RUN if [ -n "$GH_TOKEN" ]; then \
- echo "GH_TOKEN is set, proceeding to download the release asset..."; \
- gh release download -R ${GITHUB_USER}/${GITHUB_REPO} -p "OpenMS-App.zip" -D /app; \
- else \
- echo "GH_TOKEN is not set, skipping the release asset download."; \
- fi
-
-# make sure that mamba environment is used
-SHELL ["mamba", "run", "-n", "streamlit-env", "/bin/bash", "-c"]
-
-EXPOSE $PORT
-ENTRYPOINT ["/app/entrypoint.sh"]
From 4a438a7203ad2959fcfac17e8d597c342edeabf2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 14 Mar 2026 14:16:49 +0000
Subject: [PATCH 06/11] Fix remaining CI test failures: remove duplicate main()
in digest.py, remove broken AppTest tests from tests/, update ci.yml to run
only mock-based tests
Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com>
---
.github/workflows/ci.yml | 2 +-
content/digest.py | 3 ---
tests/test_run_subprocess.py | 18 ------------------
tests/test_simple_workflow.py | 26 --------------------------
4 files changed, 1 insertion(+), 48 deletions(-)
delete mode 100644 tests/test_run_subprocess.py
delete mode 100644 tests/test_simple_workflow.py
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index dce98b6..93dcebe 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,5 +28,5 @@ jobs:
pip install pytest
- name: Test
run: |
- python -m pytest test_gui.py tests/
+ python -m pytest tests/
diff --git a/content/digest.py b/content/digest.py
index 8cd70f9..6382744 100644
--- a/content/digest.py
+++ b/content/digest.py
@@ -315,7 +315,4 @@ def main():
st.error("Please check your input and try again. If the problem persists, try with a simpler enzyme like Trypsin.")
-if __name__ == "__main__":
-
- main()
main()
\ No newline at end of file
diff --git a/tests/test_run_subprocess.py b/tests/test_run_subprocess.py
deleted file mode 100644
index 91150bc..0000000
--- a/tests/test_run_subprocess.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import pytest
-from streamlit.testing.v1 import AppTest
-import json
-
-@pytest.fixture
-def launch():
- """Launch the Workflow Configure Streamlit page for testing."""
- app = AppTest.from_file("content/workflow_configure.py")
- with open("settings.json", "r") as f:
- app.session_state.settings = json.load(f)
- app.session_state.settings["test"] = True
- app.secrets["workspace"] = "test"
- app.run(timeout=10)
- return app
-
-def test_page_loads(launch):
- """Ensure the workflow configure page loads without errors."""
- assert not launch.exception
\ No newline at end of file
diff --git a/tests/test_simple_workflow.py b/tests/test_simple_workflow.py
deleted file mode 100644
index 7013451..0000000
--- a/tests/test_simple_workflow.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import pytest
-from streamlit.testing.v1 import AppTest
-import json
-
-"""
-Tests for the Digest page functionality.
-
-These tests verify:
-- Page can be launched without errors
-- Session state initialization works correctly
-"""
-
-@pytest.fixture
-def launch():
- """Launch the Digest page for testing."""
- app = AppTest.from_file("content/digest.py")
- with open("settings.json", "r") as f:
- app.session_state.settings = json.load(f)
- app.session_state.settings["test"] = True
- app.secrets["workspace"] = "test"
- app.run(timeout=30)
- return app
-
-def test_page_loads(launch):
- """Ensure the digest page loads without errors."""
- assert not launch.exception
From 634335b4731e86c285d2c50787c9549ad7117fc5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 16 Mar 2026 09:03:19 +0000
Subject: [PATCH 07/11] Dynamically discover pages from app.py in tests instead
of hardcoding
Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com>
---
test.py | 44 +++++++++++++++++++++++++++---------------
test_gui.py | 55 +++++++++++++++++++++++++++++++++++++++++------------
2 files changed, 72 insertions(+), 27 deletions(-)
diff --git a/test.py b/test.py
index 68a82fd..f794814 100644
--- a/test.py
+++ b/test.py
@@ -1,8 +1,34 @@
import unittest
+import ast
import json
from pathlib import Path
+def get_pages_from_app():
+ """Parse app.py AST to extract page paths from st.Page(Path(...)) calls."""
+ tree = ast.parse(Path("app.py").read_text())
+ pages = []
+ for node in ast.walk(tree):
+ # Match st.Page(Path("content", "filename.py"), ...)
+ if (
+ isinstance(node, ast.Call)
+ and isinstance(node.func, ast.Attribute)
+ and node.func.attr == "Page"
+ and node.args
+ and isinstance(node.args[0], ast.Call)
+ and isinstance(node.args[0].func, ast.Name)
+ and node.args[0].func.id == "Path"
+ ):
+ parts = [
+ arg.value
+ for arg in node.args[0].args
+ if isinstance(arg, ast.Constant) and isinstance(arg.value, str)
+ ]
+ if parts:
+ pages.append(str(Path(*parts)))
+ return pages
+
+
class TestSettingsJson(unittest.TestCase):
def test_settings_json_exists(self):
self.assertTrue(Path("settings.json").exists(), "settings.json file is missing")
@@ -17,21 +43,9 @@ def test_settings_json_valid(self):
class TestContentPagesExist(unittest.TestCase):
def test_all_content_pages_exist(self):
"""Test that all content pages referenced by app.py exist."""
- expected_pages = [
- "content/quickstart.py",
- "content/workflow_fileupload.py",
- "content/workflow_configure.py",
- "content/workflow_run.py",
- "content/results_database_search.py",
- "content/results_rescoring.py",
- "content/results_filtered.py",
- "content/results_abundance.py",
- "content/results_volcano.py",
- "content/results_pca.py",
- "content/results_heatmap.py",
- "content/results_library.py",
- ]
- for page in expected_pages:
+ pages = get_pages_from_app()
+ self.assertTrue(len(pages) > 0, "No pages found in app.py")
+ for page in pages:
self.assertTrue(Path(page).exists(), f"Content page {page} is missing")
diff --git a/test_gui.py b/test_gui.py
index 467d116..53b1f59 100644
--- a/test_gui.py
+++ b/test_gui.py
@@ -1,8 +1,50 @@
+import ast
+from pathlib import Path
from streamlit.testing.v1 import AppTest
import pytest
import json
+def get_pages_from_app():
+ """Parse app.py AST to extract page paths from st.Page(Path(...)) calls."""
+ tree = ast.parse(Path("app.py").read_text())
+ pages = []
+ for node in ast.walk(tree):
+ if (
+ isinstance(node, ast.Call)
+ and isinstance(node.func, ast.Attribute)
+ and node.func.attr == "Page"
+ and node.args
+ and isinstance(node.args[0], ast.Call)
+ and isinstance(node.args[0].func, ast.Name)
+ and node.args[0].func.id == "Path"
+ ):
+ parts = [
+ arg.value
+ for arg in node.args[0].args
+ if isinstance(arg, ast.Constant) and isinstance(arg.value, str)
+ ]
+ if parts:
+ pages.append(str(Path(*parts)))
+ return pages
+
+
+def _uses_page_link(path: str) -> bool:
+ """Return True if the file calls st.page_link(), which is incompatible with AppTest."""
+ return "st.page_link(" in Path(path).read_text()
+
+
+# Collect all content pages: those registered in app.py plus any other .py files
+# in content/ (utility pages like digest.py, fragmentation.py, etc.).
+# Exclude pages using st.page_link() — these require full st.navigation()
+# context and cannot be launched in isolation via AppTest.
+_app_pages = get_pages_from_app()
+_all_content = sorted(str(p) for p in Path("content").glob("*.py"))
+_pages_to_test = sorted(
+ p for p in set(_app_pages) | set(_all_content) if not _uses_page_link(p)
+)
+
+
@pytest.fixture
def launch(request):
test = AppTest.from_file(request.param)
@@ -16,18 +58,7 @@ def launch(request):
# Test launching of all pages
-@pytest.mark.parametrize(
- "launch",
- (
- "content/workflow_fileupload.py",
- "content/workflow_configure.py",
- "content/workflow_run.py",
- "content/digest.py",
- "content/fragmentation.py",
- "content/isotope_pattern_generator.py",
- ),
- indirect=True,
-)
+@pytest.mark.parametrize("launch", _pages_to_test, indirect=True)
def test_launch(launch):
"""Test if all pages can be launched without errors."""
launch.run(timeout=30)
From 5dc615dabc0852189c236e04c091930a228bdc58 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 16 Mar 2026 09:04:16 +0000
Subject: [PATCH 08/11] Address review: improve assertion message and filter
__init__.py from content glob
Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com>
---
test.py | 5 ++++-
test_gui.py | 4 +++-
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/test.py b/test.py
index f794814..1dd02c0 100644
--- a/test.py
+++ b/test.py
@@ -44,7 +44,10 @@ class TestContentPagesExist(unittest.TestCase):
def test_all_content_pages_exist(self):
"""Test that all content pages referenced by app.py exist."""
pages = get_pages_from_app()
- self.assertTrue(len(pages) > 0, "No pages found in app.py")
+ self.assertTrue(
+ len(pages) > 0,
+ "No pages found in app.py — ensure app.py contains st.Page(Path(...)) calls",
+ )
for page in pages:
self.assertTrue(Path(page).exists(), f"Content page {page} is missing")
diff --git a/test_gui.py b/test_gui.py
index 53b1f59..20b32c0 100644
--- a/test_gui.py
+++ b/test_gui.py
@@ -39,7 +39,9 @@ def _uses_page_link(path: str) -> bool:
# Exclude pages using st.page_link() — these require full st.navigation()
# context and cannot be launched in isolation via AppTest.
_app_pages = get_pages_from_app()
-_all_content = sorted(str(p) for p in Path("content").glob("*.py"))
+_all_content = sorted(
+ str(p) for p in Path("content").glob("*.py") if p.name != "__init__.py"
+)
_pages_to_test = sorted(
p for p in set(_app_pages) | set(_all_content) if not _uses_page_link(p)
)
From fc94c0b4dde66053c8dc14cd4c3bb0d7f61479ec Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 16 Mar 2026 09:37:37 +0000
Subject: [PATCH 09/11] Restore test_gui.py in ci.yml: fix mock isolation and
use dict-style session_state access
Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com>
---
.github/workflows/ci.yml | 2 +-
src/common/common.py | 2 +-
test.py | 2 +-
test_gui.py | 10 +++++-----
tests/test_parameter_presets.py | 15 +++++++++++++++
tests/test_topp_workflow_parameter.py | 9 +++++++++
6 files changed, 32 insertions(+), 8 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 93dcebe..dce98b6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,5 +28,5 @@ jobs:
pip install pytest
- name: Test
run: |
- python -m pytest tests/
+ python -m pytest test_gui.py tests/
diff --git a/src/common/common.py b/src/common/common.py
index 4d24e76..971a597 100644
--- a/src/common/common.py
+++ b/src/common/common.py
@@ -329,7 +329,7 @@ def page_setup(page: str = "") -> dict[str, Any]:
"""
if "settings" not in st.session_state:
with open("settings.json", "r") as f:
- st.session_state.settings = json.load(f)
+ st.session_state["settings"] = json.load(f)
# Set Streamlit page configurations
st.set_page_config(
diff --git a/test.py b/test.py
index 1dd02c0..c392332 100644
--- a/test.py
+++ b/test.py
@@ -6,7 +6,7 @@
def get_pages_from_app():
"""Parse app.py AST to extract page paths from st.Page(Path(...)) calls."""
- tree = ast.parse(Path("app.py").read_text())
+ tree = ast.parse(Path("app.py").read_text(encoding="utf-8"))
pages = []
for node in ast.walk(tree):
# Match st.Page(Path("content", "filename.py"), ...)
diff --git a/test_gui.py b/test_gui.py
index 20b32c0..04cf6e0 100644
--- a/test_gui.py
+++ b/test_gui.py
@@ -7,7 +7,7 @@
def get_pages_from_app():
"""Parse app.py AST to extract page paths from st.Page(Path(...)) calls."""
- tree = ast.parse(Path("app.py").read_text())
+ tree = ast.parse(Path("app.py").read_text(encoding="utf-8"))
pages = []
for node in ast.walk(tree):
if (
@@ -31,7 +31,7 @@ def get_pages_from_app():
def _uses_page_link(path: str) -> bool:
"""Return True if the file calls st.page_link(), which is incompatible with AppTest."""
- return "st.page_link(" in Path(path).read_text()
+ return "st.page_link(" in Path(path).read_text(encoding="utf-8")
# Collect all content pages: those registered in app.py plus any other .py files
@@ -51,10 +51,10 @@ def _uses_page_link(path: str) -> bool:
def launch(request):
test = AppTest.from_file(request.param)
- ## Initialize session state ##
+ ## Initialize session state (use dict-style access for conda compatibility) ##
with open("settings.json", "r") as f:
- test.session_state.settings = json.load(f)
- test.session_state.settings["test"] = True
+ test.session_state["settings"] = json.load(f)
+ test.session_state["settings"]["test"] = True
test.secrets["workspace"] = "test"
return test
diff --git a/tests/test_parameter_presets.py b/tests/test_parameter_presets.py
index 5104abc..25da1e7 100644
--- a/tests/test_parameter_presets.py
+++ b/tests/test_parameter_presets.py
@@ -16,6 +16,10 @@
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(PROJECT_ROOT)
+# Save original modules before mocking (to restore after import)
+_orig_streamlit = sys.modules.get('streamlit')
+_orig_pyopenms = sys.modules.get('pyopenms')
+
# Create mock for streamlit before importing ParameterManager
mock_streamlit = MagicMock()
mock_streamlit.session_state = {}
@@ -29,6 +33,17 @@
# Now import after mocks are set up
from src.workflow.ParameterManager import ParameterManager
+# Restore original modules to avoid contaminating other test modules
+if _orig_streamlit is not None:
+ sys.modules['streamlit'] = _orig_streamlit
+elif 'streamlit' in sys.modules:
+ del sys.modules['streamlit']
+
+if _orig_pyopenms is not None:
+ sys.modules['pyopenms'] = _orig_pyopenms
+elif 'pyopenms' in sys.modules:
+ del sys.modules['pyopenms']
+
@pytest.fixture
def temp_workflow_dir():
diff --git a/tests/test_topp_workflow_parameter.py b/tests/test_topp_workflow_parameter.py
index fa4d75e..b36a322 100644
--- a/tests/test_topp_workflow_parameter.py
+++ b/tests/test_topp_workflow_parameter.py
@@ -14,11 +14,20 @@
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(PROJECT_ROOT)
+# Save original pyopenms module before mocking (to restore after import)
+_orig_pyopenms = sys.modules.get('pyopenms')
+
# Create mock for pyopenms to avoid dependency on actual OpenMS installation
mock_pyopenms = MagicMock()
mock_pyopenms.__version__ = "2.9.1" # Mock version for testing
sys.modules['pyopenms'] = mock_pyopenms
+# Restore original pyopenms to avoid contaminating other test modules
+if _orig_pyopenms is not None:
+ sys.modules['pyopenms'] = _orig_pyopenms
+elif 'pyopenms' in sys.modules:
+ del sys.modules['pyopenms']
+
@pytest.fixture
def mock_streamlit():
"""Mock essential Streamlit components for testing parameter display."""
From 3e96e9438fc2bdefd3c385f9f0d7b6438caf95ce Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 16 Mar 2026 09:54:30 +0000
Subject: [PATCH 10/11] Revert dict-style session_state access: no longer
needed after mock isolation fix
Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com>
---
src/common/common.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/common/common.py b/src/common/common.py
index 971a597..4d24e76 100644
--- a/src/common/common.py
+++ b/src/common/common.py
@@ -329,7 +329,7 @@ def page_setup(page: str = "") -> dict[str, Any]:
"""
if "settings" not in st.session_state:
with open("settings.json", "r") as f:
- st.session_state["settings"] = json.load(f)
+ st.session_state.settings = json.load(f)
# Set Streamlit page configurations
st.set_page_config(
From 71021665738d723f0cf22f27ff6aaaa47145eaee Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 16 Mar 2026 09:58:11 +0000
Subject: [PATCH 11/11] =?UTF-8?q?Remove=20content/digest.py,=20fragmentati?=
=?UTF-8?q?on.py,=20isotope=5Fpattern=5Fgenerator.py=20and=20utils/=20?=
=?UTF-8?q?=E2=80=94=20not=20in=20app.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: t0mdavid-m <57191390+t0mdavid-m@users.noreply.github.com>
---
content/digest.py | 318 -----------
content/fragmentation.py | 799 ---------------------------
content/isotope_pattern_generator.py | 625 ---------------------
test_gui.py | 5 +-
utils/__init__.py | 3 -
utils/digest.py | 359 ------------
utils/fasta.py | 151 -----
7 files changed, 2 insertions(+), 2258 deletions(-)
delete mode 100644 content/digest.py
delete mode 100644 content/fragmentation.py
delete mode 100644 content/isotope_pattern_generator.py
delete mode 100644 utils/__init__.py
delete mode 100644 utils/digest.py
delete mode 100644 utils/fasta.py
diff --git a/content/digest.py b/content/digest.py
deleted file mode 100644
index 6382744..0000000
--- a/content/digest.py
+++ /dev/null
@@ -1,318 +0,0 @@
-"""
-In Silico Protein Digest Page
-
-This module provides functionality for performing in silico protein digestion
-using pyOpenMS. Users can input protein sequences in FASTA format and get
-peptide lists with mass calculations.
-"""
-
-import streamlit as st
-import sys
-from pathlib import Path
-
-# Add utils to path
-sys.path.append(str(Path(__file__).parent.parent))
-
-from utils.fasta import validate_fasta_input
-from utils.digest import perform_digest, get_digest_statistics, get_available_enzymes, filter_peptides_by_length, calculate_protein_coverage, generate_coverage_html
-
-# Default values
-DEFAULT_ENZYME = "Trypsin"
-DEFAULT_MISSED_CLEAVAGES = 0 # Changed from 2 to 0
-DEFAULT_MAX_CHARGES = 5
-DEFAULT_MIN_PEPTIDE_LENGTH = 6
-DEFAULT_MAX_PEPTIDE_LENGTH = 50
-
-
-def main():
- """Main function for the digest page."""
- st.title("✂️ In Silico Protein Digest")
-
- st.markdown("""
- **Simulate enzymatic protein digestion computationally** to predict peptides for mass spectrometry analysis.
-
- This tool uses pyOpenMS to perform theoretical protein digestion with various proteases, helping you:
- - **Plan MS experiments** by predicting which peptides will be generated
- - **Optimize digestion conditions** by testing different enzymes and parameters
- - **Analyze protein coverage** and identify potential issues before experimental work
- - **Generate theoretical peptide lists** with accurate mass-to-charge ratios
- """)
-
- with st.expander("📚 **How In Silico Digestion Works**"):
- st.markdown("""
- **Enzymatic Cleavage Simulation:**
- - Enzymes cut proteins at specific amino acid sequences (cleavage sites)
- - **Trypsin** cuts after K (lysine) and R (arginine), except when followed by P (proline)
- - **Pepsin** cuts preferentially at F, L, W, Y amino acids under acidic conditions
- - **Chymotrypsin** cuts after F, W, Y, L amino acids
-
- **Missed Cleavages:**
- - Real digestion is not 100% efficient - some cleavage sites are missed
- - Allows prediction of longer peptides that contain uncleaved sites
- - Important for comprehensive coverage analysis
-
- **Applications:**
- - **Bottom-up proteomics** experiment planning
- - **Peptide mapping** for protein characterization
- - **Method development** for LC-MS/MS workflows
- - **Quality control** for digestion efficiency assessment
- """)
-
- # Input form section
- with st.form("digest_form"):
- st.subheader("Input Parameters")
-
- # FASTA input
- default_sequence = """>sp|Q9UPY3|DICER_HUMAN Endoribonuclease Dicer OS=Homo sapiens OX=9606 GN=DICER1 PE=1 SV=3
-MKSPALQPLSMAGLQLMTPASSPMGPFFGLPWQQEAIHDNIYTPRKYQVELLEAALDHNT
-IVCLNTGSGKTFIAVLLTKELSYQIRGDFSRNGKRTVFLVNSANQVAQQVSAVRTHSDLK
-VGEYSNLEVNASWTKERWNQEFTKHQVLIMTCYVALNVLKNGYLSLSDINLLVFDECHLA
-ILDHPYREIMKLCENCPSCPRILGLTASILNGKCDPEELEEKIQKLEKILKSNAETATDL
-VVLDRYTSQPCEIVVDCGPFTDRSGLYERLLMELEEALNFINDCNISVHSKERDSTLISK
-QILSDCRAVLVVLGPWCADKVAGMMVRELQKYIKHEQEELHRKFLLFTDTFLRKIHALCE
-EHFSPASLDLKFVTPKVIKLLEILRKYKPYERQQFESVEWYNNRNQDNYVSWSDSEDDDE
-DEEIEEKEKPETNFPSPFTNILCGIIFVERRYTAVVLNRLIKEAGKQDPELAYISSNFIT
-GHGIGKNQPRNKQMEAEFRKQEEVLRKFRAHETNLLIATSIVEEGVDIPKCNLVVRFDLP
-TEYRSYVQSKGRARAPISNYIMLADTDKIKSFEEDLKTYKAIEKILRNKCSKSVDTGETD
-IDPVMDDDDVFPPYVLRPDDGGPRVTINTAIGHINRYCARLPSDPFTHLAPKCRTRELPD
-GTFYSTLYLPINSPLRASIVGPPMSCVRLAERVVALICCEKLHKIGELDDHLMPVGKETV
-KYEEELDLHDEEETSVPGRPGSTKRRQCYPKAIPECLRDSYPRPDQPCYLYVIGMVLTTP
-LPDELNFRRRKLYPPEDTTRCFGILTAKPIPQIPHFPVYTRSGEVTISIELKKSGFMLSL
-QMLELITRLHQYIFSHILRLEKPALEFKPTDADSAYCVLPLNVVNDSSTLDIDFKFMEDI
-EKSEARIGIPSTKYTKETPFVFKLEDYQDAVIIPRYRNFDQPHRFYVADVYTDLTPLSKF
-PSPEYETFAEYYKTKYNLDLTNLNQPLLDVDHTSSRLNLLTPRHLNQKGKALPLSSAEKR
-KAKWESLQNKQILVPELCAIHPIPASLWRKAVCLPSILYRLHCLLTAEELRAQTASDAGV
-GVRSLPADFRYPNLDFGWKKSIDSKSFISISNSSSAENDNYCKHSTIVPENAAHQGANRT
-SSLENHDQMSVNCRTLLSESPGKLHVEVSADLTAINGLSYNQNLANGSYDLANRDFCQGN
-QLNYYKQEIPVQPTTSYSIQNLYSYENQPQPSDECTLLSNKYLDGNANKSTSDGSPVMAV
-MPGTTDTIQVLKGRMDSEQSPSIGYSSRTLGPNPGLILQALTLSNASDGFNLERLEMLGD
-SFLKHAITTYLFCTYPDAHEGRLSYMRSKKVSNCNLYRLGKKKGLPSRMVVSIFDPPVNW
-LPPGYVVNQDKSNTDKWEKDEMTKDCMLANGKLDEDYEEEDEEEESLMWRAPKEEADYED
-DFLEYDQEHIRFIDNMLMGSGAFVKKISLSPFSTTDSAYEWKMPKKSSLGSMPFSSFDED
-FDYSSWDAMCYLDPSKAVEEDDFVVGFWNPSEENCGVDTGKQSISYDLHTEQCIADKSIA
-DCVEALLGCYLTSCGERAAQLFLCSLGLKVLPVIKRTDREKALCPTRENFNSQQKNLSVS
-CAAASVASSRSSVLKDSEYGCLKIPPRCMFDHPDADKTLNHLISGFENFEKKINYRFKNK
-AYLLQAFTHASYHYNTITDCYQRLEFLGDAILDYLITKHLYEDPRQHSPGVLTDLRSALV
-NNTIFASLAVKYDYHKYFKAVSPELFHVIDDFVQFQLEKNEMQGMDSELRRSEEDEEKEE
-DIEVPKAMGDIFESLAGAIYMDSGMSLETVWQVYYPMMRPLIEKFSANVPRSPVRELLEM
-EPETAKFSPAERTYDGKVRVTVEVVGKGKFKGVGRSYRIAKSAAARRALRSLKANQPQVP
-NS"""
-
- fasta_input = st.text_area(
- "Paste protein sequences in FASTA format",
- value=default_sequence,
- height=200,
- help="Default sequence: DICER_HUMAN protein for demonstration"
- )
-
- # Get available enzymes
- try:
- available_enzymes = get_available_enzymes()
- # convert bytes to str if necessary
- available_enzymes = [enzyme.decode() if isinstance(enzyme, bytes) else enzyme for enzyme in available_enzymes]
-
- except Exception as e:
- st.error(f"❌ Cannot load enzyme database: {e}")
- st.error("Please ensure pyOpenMS is properly configured before using the digest functionality.")
- st.stop()
-
- # Enzyme selection
- enzyme_index = 0
- if DEFAULT_ENZYME in available_enzymes:
- enzyme_index = available_enzymes.index(DEFAULT_ENZYME)
-
- enzyme = st.selectbox(
- "Enzyme",
- options=available_enzymes,
- index=enzyme_index,
- help="Select the enzyme for protein digestion"
- )
-
- # Parameters
- col1, col2 = st.columns(2)
-
- with col1:
- missed_cleavages = st.number_input(
- "Max missed cleavages",
- min_value=0,
- max_value=10,
- value=DEFAULT_MISSED_CLEAVAGES,
- help="Maximum number of missed cleavages allowed"
- )
-
- with col2:
- max_charges = st.number_input(
- "Max charge state (N)",
- min_value=1,
- max_value=10,
- value=DEFAULT_MAX_CHARGES,
- help="Maximum charge state to calculate [M + nH]"
- )
-
- # Peptide length filtering
- st.subheader("Peptide Length Filtering")
- col3, col4 = st.columns(2)
-
- with col3:
- min_peptide_length = st.number_input(
- "Min peptide length (AA)",
- min_value=1,
- max_value=100,
- value=DEFAULT_MIN_PEPTIDE_LENGTH,
- help="Minimum peptide length in amino acids"
- )
-
- with col4:
- max_peptide_length = st.number_input(
- "Max peptide length (AA)",
- min_value=1,
- max_value=200,
- value=DEFAULT_MAX_PEPTIDE_LENGTH,
- help="Maximum peptide length in amino acids"
- )
-
- # Submit button
- submit = st.form_submit_button("🧬 Digest Proteins", type="primary")
-
- # Process form submission
- if submit:
- if not fasta_input.strip():
- st.error("❌ Please provide FASTA sequences to digest.")
- return
-
- # Show progress
- with st.spinner("🔬 Performing in silico digest..."):
- # Validate FASTA input
- is_valid, error_message, sequences = validate_fasta_input(fasta_input)
-
- if not is_valid:
- st.error(f"❌ FASTA validation failed: {error_message}")
- return
-
- if not sequences:
- st.error("❌ No valid sequences found in the input.")
- return
-
- # Show input summary
- st.success(f"✅ Successfully parsed {len(sequences)} protein sequence(s)")
-
- # Progress bar
- progress_bar = st.progress(0, text="Initializing digest...")
-
- try:
- # Perform digest
- progress_bar.progress(30, text="Performing enzymatic digest...")
-
- df_results = perform_digest(
- sequences=sequences,
- enzyme=enzyme,
- missed_cleavages=missed_cleavages,
- max_charges=max_charges
- )
-
- progress_bar.progress(60, text="Applying peptide length filters...")
-
- # Apply peptide length filtering
- df_results = filter_peptides_by_length(
- df_results,
- min_length=min_peptide_length,
- max_length=max_peptide_length
- )
-
- progress_bar.progress(80, text="Processing results...")
-
- if df_results.empty:
- st.warning("⚠️ No peptides were generated from the digest or all peptides were filtered out. Try adjusting the parameters or check your input sequences.")
- progress_bar.empty()
- return
-
- progress_bar.progress(100, text="Complete!")
- progress_bar.empty()
-
- # Display results
- st.subheader("📊 Digest Results")
-
- # Summary statistics
- stats = get_digest_statistics(df_results)
-
- col1, col2, col3, col4 = st.columns(4)
- with col1:
- st.metric("Total Peptides", f"{stats['total_peptides']:,}")
- with col2:
- st.metric("Unique Proteins", stats['unique_proteins'])
- with col3:
- st.metric("Avg Length", f"{stats['avg_peptide_length']:.1f} AA")
- with col4:
- st.metric("Mass Range", f"{stats['mass_range'][0]:.0f}-{stats['mass_range'][1]:.0f} Da")
-
- # Results table
- st.dataframe(
- df_results,
- use_container_width=True,
- hide_index=True,
- column_config={
- "Accession": st.column_config.TextColumn("Accession", width="small"),
- "Description": st.column_config.TextColumn("Description", width="large"),
- "Peptide Sequence": st.column_config.TextColumn("Peptide Sequence", width="medium"),
- "Length": st.column_config.NumberColumn("Length", help="Peptide length in amino acids"),
- "Start": st.column_config.TextColumn("Start", width="small", help="1-based start position(s) in protein sequence"),
- "End": st.column_config.TextColumn("End", width="small", help="1-based end position(s) in protein sequence"),
- "[M]": st.column_config.NumberColumn("[M]", format="%.4f"),
- }
- )
-
- # Protein Coverage Visualization
- st.subheader("🎨 Protein Coverage Visualization")
- st.markdown("""
- **Sequence Coverage Analysis:** Each amino acid is colored based on how many peptides cover that position.
- Hover over amino acids to see exact coverage counts.
- """)
-
- # Calculate coverage for each protein
- coverage_data = calculate_protein_coverage(df_results, sequences)
-
- # Display coverage for each protein
- for accession, coverage_info in coverage_data.items():
- coverage_html = generate_coverage_html(accession, coverage_info)
- st.markdown(coverage_html, unsafe_allow_html=True)
-
- # Download section
- st.subheader("⬇️ Download Results")
-
- # Generate TSV
- tsv_data = df_results.to_csv(sep="\t", index=False)
-
- col1, col2 = st.columns(2)
- with col1:
- st.download_button(
- label="📄 Download as TSV",
- data=tsv_data,
- file_name=f"digest_results_{enzyme}_{missed_cleavages}mc.tsv",
- mime="text/tab-separated-values",
- help="Download results as tab-separated values file"
- )
-
- with col2:
- csv_data = df_results.to_csv(index=False)
- st.download_button(
- label="📄 Download as CSV",
- data=csv_data,
- file_name=f"digest_results_{enzyme}_{missed_cleavages}mc.csv",
- mime="text/csv",
- help="Download results as comma-separated values file"
- )
-
- # Additional information
- with st.expander("ℹ️ Digest Parameters Used"):
- st.write(f"**Enzyme:** {enzyme}")
- st.write(f"**Max missed cleavages:** {missed_cleavages}")
- st.write(f"**Max charge states:** {max_charges}")
- st.write(f"**Input sequences:** {len(sequences)}")
-
- except Exception as e:
- progress_bar.empty()
- st.exception(f"❌ An error occurred during digest: {str(e)}")
- st.error("Please check your input and try again. If the problem persists, try with a simpler enzyme like Trypsin.")
-
-
-main()
\ No newline at end of file
diff --git a/content/fragmentation.py b/content/fragmentation.py
deleted file mode 100644
index 1cb8126..0000000
--- a/content/fragmentation.py
+++ /dev/null
@@ -1,799 +0,0 @@
-import io
-import re
-from typing import Tuple, Dict, Any, Optional, List
-
-import plotly.graph_objects as go
-import streamlit as st
-import pyopenms as oms
-import pandas as pd
-
-from src.common.common import page_setup, show_fig
-
-params = page_setup()
-
-# Ion type configuration
-ION_TYPES = {
- 'a': {'name': 'a-ions', 'description': 'N-terminal ions (peptide bond + loss of CO)', 'param': 'add_a_ions'},
- 'b': {'name': 'b-ions', 'description': 'N-terminal ions (peptide bond cleavage)', 'param': 'add_b_ions'},
- 'c': {'name': 'c-ions', 'description': 'N-terminal ions (N-Cα bond cleavage)', 'param': 'add_c_ions'},
- 'x': {'name': 'x-ions', 'description': 'C-terminal ions (N-Cα bond + addition of CO)', 'param': 'add_x_ions'},
- 'y': {'name': 'y-ions', 'description': 'C-terminal ions (peptide bond cleavage)', 'param': 'add_y_ions'},
- 'z': {'name': 'z-ions', 'description': 'C-terminal ions (N-Cα bond cleavage)', 'param': 'add_z_ions'}
-}
-
-def validate_peptide_sequence(sequence_str: str) -> Tuple[bool, str, Optional[str]]:
- """Validate a peptide sequence for fragmentation.
-
- Args:
- sequence_str (str): The amino acid sequence
-
- Returns:
- Tuple[bool, str, Optional[str]]: (is_valid, error_message, clean_sequence)
- """
- try:
- # Clean the sequence
- sequence_str = sequence_str.strip().upper()
- if not sequence_str:
- return False, "Sequence cannot be empty", None
-
- # Remove common formatting characters
- clean_sequence = re.sub(r'[^ACDEFGHIKLMNPQRSTVWYXU]', '', sequence_str)
-
- if not clean_sequence:
- return False, "No valid amino acid letters found", None
-
- # Check minimum length for fragmentation
- if len(clean_sequence) < 2:
- return False, "Sequence must be at least 2 amino acids long for fragmentation", None
-
- # Validate amino acids
- valid_aa = set("ACDEFGHIKLMNPQRSTVWYXU")
- invalid_chars = [aa for aa in clean_sequence if aa not in valid_aa]
-
- if invalid_chars:
- invalid_list = ", ".join(sorted(set(invalid_chars)))
- return False, f"Invalid amino acid(s): {invalid_list}", None
-
- return True, "", clean_sequence
-
- except Exception as e:
- return False, f"Error validating sequence: {str(e)}", None
-
-def configure_spectrum_generator(ion_types: List[str], max_charge: int = 2) -> oms.TheoreticalSpectrumGenerator:
- """Configure the TheoreticalSpectrumGenerator with selected ion types.
-
- Args:
- ion_types (List[str]): List of ion type keys to enable
- max_charge (int): Maximum charge state to consider
-
- Returns:
- oms.TheoreticalSpectrumGenerator: Configured generator
- """
- tsg = oms.TheoreticalSpectrumGenerator()
- param = oms.Param()
-
- # Disable all ion types first
- for ion_key, ion_info in ION_TYPES.items():
- param.setValue(ion_info['param'], "false")
-
- # Enable selected ion types
- for ion_type in ion_types:
- if ion_type in ION_TYPES:
- param.setValue(ION_TYPES[ion_type]['param'], "true")
-
- # Set other parameters
- param.setValue("add_first_prefix_ion", "true")
- param.setValue("add_losses", "false") # Disable neutral losses for simplicity
- param.setValue("add_metainfo", "true")
- param.setValue("add_isotopes", "false") # Disable isotopes for cleaner spectra
- param.setValue("max_isotope", 2)
- param.setValue("rel_loss_intensity", 0.1)
-
- tsg.setParameters(param)
- return tsg
-
-def generate_theoretical_spectrum(sequence_str: str, ion_types: List[str], charges: List[int]) -> Dict[str, Any]:
- """Generate theoretical fragment spectrum for a peptide sequence.
-
- Args:
- sequence_str (str): The amino acid sequence
- ion_types (List[str]): List of ion types to include
- charges (List[int]): List of charge states to consider
-
- Returns:
- Dict[str, Any]: Results dictionary with fragment data
- """
- try:
- # Validate sequence
- is_valid, error_msg, clean_sequence = validate_peptide_sequence(sequence_str)
- if not is_valid:
- return {"success": False, "error": error_msg}
-
- if not ion_types:
- return {"success": False, "error": "Please select at least one ion type"}
-
- if not charges:
- return {"success": False, "error": "Please select at least one charge state"}
-
- # Create AASequence object
- aa_sequence = oms.AASequence.fromString(clean_sequence)
-
- # Configure spectrum generator
- max_charge = max(charges)
- tsg = configure_spectrum_generator(ion_types, max_charge)
-
- # Generate spectra for each charge state
- all_fragments = []
-
- for charge in charges:
- spectrum = oms.MSSpectrum()
- tsg.getSpectrum(spectrum, aa_sequence, charge, charge)
-
- # Extract peak data with annotations from StringDataArrays
- mzs = spectrum.get_peaks()[0]
- intensities = spectrum.get_peaks()[1]
-
- # Get annotations from StringDataArrays
- annotations = []
- if spectrum.getStringDataArrays():
- annotations = list(spectrum.getStringDataArrays()[0])
- annotations = [ann.decode('utf-8') if isinstance(ann, bytes) else ann for ann in annotations]
-
- # If no annotations available, create empty list
- if not annotations:
- annotations = [''] * len(mzs)
-
- for mz, intensity, annotation in zip(mzs, intensities, annotations):
- # Parse ion information from annotation
- ion_info = parse_ion_annotation(annotation, mz, clean_sequence)
-
- all_fragments.append({
- 'mz': mz,
- 'intensity': intensity,
- 'charge': charge,
- 'ion_type': ion_info.get('ion_type', 'unknown'),
- 'fragment_number': ion_info.get('fragment_number', 0),
- 'sequence': ion_info.get('fragment_sequence', ''),
- 'annotation': annotation if annotation else f'm/z {mz:.4f}'
- })
-
- # Convert to DataFrame
- df = pd.DataFrame(all_fragments)
- df = df.sort_values(['ion_type', 'fragment_number', 'charge'])
-
- return {
- "success": True,
- "fragments": df,
- "sequence": clean_sequence,
- "ion_types": ion_types,
- "charges": charges,
- "input_value": sequence_str
- }
-
- except Exception as e:
- return {"success": False, "error": f"Error generating spectrum: {str(e)}"}
-
-def parse_ion_annotation(annotation, mz: float, peptide_sequence: str = '') -> Dict[str, Any]:
- """Parse ion annotation string from pyOpenMS to extract ion information.
-
- Args:
- annotation: The annotation from StringDataArrays (str or bytes)
- mz (float): The m/z value
- peptide_sequence (str): The full peptide sequence
-
- Returns:
- Dict[str, Any]: Parsed ion information
- """
- # Handle bytes objects from pyOpenMS
- if isinstance(annotation, bytes):
- annotation = annotation.decode('utf-8')
-
- # Convert to string if needed
- annotation = str(annotation) if annotation is not None else ''
-
- if not annotation:
- return {
- 'ion_type': 'unknown',
- 'fragment_number': 0,
- 'fragment_sequence': '',
- 'annotation': f'm/z {mz:.4f}'
- }
-
- # Parse annotation like "b3+", "y5++", etc.
-
- # Match pattern: ion_type + number + charges
- match = re.match(r'([abcxyz])(\d+)(\+*)', annotation)
- if match:
- ion_type = match.group(1)
- fragment_number = int(match.group(2))
- charges = len(match.group(3))
-
- # Calculate fragment sequence
- fragment_sequence = ''
- if peptide_sequence and fragment_number > 0:
- if ion_type in ['a', 'b', 'c']: # N-terminal ions
- if fragment_number <= len(peptide_sequence):
- fragment_sequence = peptide_sequence[:fragment_number]
- elif ion_type in ['x', 'y', 'z']: # C-terminal ions
- if fragment_number <= len(peptide_sequence):
- fragment_sequence = peptide_sequence[-fragment_number:]
-
- return {
- 'ion_type': ion_type,
- 'fragment_number': fragment_number,
- 'fragment_sequence': fragment_sequence,
- 'annotation': annotation
- }
-
- # If parsing fails, return unknown
- return {
- 'ion_type': 'unknown',
- 'fragment_number': 0,
- 'fragment_sequence': '',
- 'annotation': annotation
- }
-
-def annotate_fragment(mz: float, aa_sequence: oms.AASequence, charge: int, ion_types: List[str]) -> Dict[str, Any]:
- """Annotate a fragment peak with ion type and fragment number.
-
- Args:
- mz (float): The m/z value of the fragment
- aa_sequence (oms.AASequence): The original sequence
- charge (int): The charge state
- ion_types (List[str]): Enabled ion types
-
- Returns:
- Dict[str, Any]: Annotation information
- """
- sequence_str = aa_sequence.toString()
- sequence_length = len(sequence_str)
-
- # Calculate theoretical masses for different fragment types
- for ion_type in ion_types:
- if ion_type in ['a', 'b', 'c']: # N-terminal ions
- for i in range(1, sequence_length):
- fragment_seq = sequence_str[:i]
- fragment_aa_seq = oms.AASequence.fromString(fragment_seq)
-
- # Calculate theoretical m/z for this ion type
- theoretical_mz = calculate_ion_mz(fragment_aa_seq, ion_type, charge)
-
- # Check if this matches our observed m/z (within tolerance)
- if abs(mz - theoretical_mz) < 0.01: # 0.01 Da tolerance
- return {
- 'ion_type': ion_type,
- 'fragment_number': i,
- 'fragment_sequence': fragment_seq,
- 'annotation': f'{ion_type}{i}{"+" * charge}'
- }
-
- elif ion_type in ['x', 'y', 'z']: # C-terminal ions
- for i in range(1, sequence_length):
- fragment_seq = sequence_str[-i:]
- fragment_aa_seq = oms.AASequence.fromString(fragment_seq)
-
- # Calculate theoretical m/z for this ion type
- theoretical_mz = calculate_ion_mz(fragment_aa_seq, ion_type, charge)
-
- # Check if this matches our observed m/z (within tolerance)
- if abs(mz - theoretical_mz) < 0.01: # 0.01 Da tolerance
- return {
- 'ion_type': ion_type,
- 'fragment_number': i,
- 'fragment_sequence': fragment_seq,
- 'annotation': f'{ion_type}{i}{"+" * charge}'
- }
-
- # Default annotation if no match found
- return {
- 'ion_type': 'unknown',
- 'fragment_number': 0,
- 'fragment_sequence': '',
- 'annotation': f'm/z {mz:.4f}{"+" * charge}'
- }
-
-def calculate_ion_mz(fragment_sequence: oms.AASequence, ion_type: str, charge: int) -> float:
- """Calculate theoretical m/z for a fragment ion.
-
- Args:
- fragment_sequence (oms.AASequence): The fragment sequence
- ion_type (str): The ion type (a, b, c, x, y, z)
- charge (int): The charge state
-
- Returns:
- float: Theoretical m/z value
- """
- mass = fragment_sequence.getMonoWeight()
-
- # Apply ion type specific mass adjustments
- if ion_type == 'a':
- mass -= 27.994915 # -CO
- elif ion_type == 'b':
- mass += 0.0 # No adjustment
- elif ion_type == 'c':
- mass += 17.026549 # +NH3
- elif ion_type == 'x':
- mass += 25.980218 # +CO -H
- elif ion_type == 'y':
- mass += 18.010565 # +H2O
- elif ion_type == 'z':
- mass += 0.984016 # +H -NH2
-
- # Add protons for charge
- mass += charge * 1.007276
-
- return mass / charge
-
-def create_fragmentation_plot(result_data: Dict[str, Any]) -> go.Figure:
- """Create the fragmentation spectrum plot.
-
- Args:
- result_data (Dict[str, Any]): Results from spectrum generation
-
- Returns:
- go.Figure: Plotly figure object
- """
- df = result_data["fragments"]
- print(df)
- # Color map for ion types
- color_map = {
- 'a': '#FF6B6B', # Red
- 'b': '#4ECDC4', # Teal
- 'c': '#45B7D1', # Blue
- 'x': '#96CEB4', # Green
- 'y': '#FFEAA7', # Yellow
- 'z': '#DDA0DD', # Plum
- 'unknown': '#95A5A6' # Gray
- }
-
- fig = go.Figure()
-
- # Add traces for each ion type
- for ion_type in df['ion_type'].unique():
- ion_data = df[df['ion_type'] == ion_type]
-
- fig.add_trace(go.Scatter(
- x=ion_data['mz'],
- y=ion_data['intensity'],
- mode='markers+lines',
- name=ION_TYPES.get(ion_type, {}).get('name', ion_type),
- marker=dict(
- color=color_map.get(ion_type, '#95A5A6'),
- size=8
- ),
- line=dict(width=0),
- text=ion_data['annotation'],
- hovertemplate="%{text}
" +
- "m/z: %{x:.4f}
" +
- "Intensity: %{y:.1e}
" +
- ""
- ))
-
- # Add stem lines
- for _, row in ion_data.iterrows():
- fig.add_shape(
- type="line",
- x0=row['mz'], y0=0,
- x1=row['mz'], y1=row['intensity'],
- line=dict(color=color_map.get(ion_type, '#95A5A6'), width=2)
- )
-
- fig.update_layout(
- title=f"Theoretical Fragment Spectrum: {result_data['sequence']}",
- xaxis_title="m/z",
- yaxis_title="Relative Intensity",
- hovermode='closest',
- showlegend=True,
- height=500
- )
-
- return fig
-
-# UI Implementation
-st.title("💥 Peptide Fragmentation Calculator")
-
-st.markdown("""
-Generate theoretical fragment ion spectra for peptide sequences using pyOpenMS.
-Select ion types and charge states to customize the fragmentation pattern.
-""")
-
-# Documentation section
-with st.expander("📚 Documentation", expanded=False):
- st.markdown("""
- ## Overview
-
- The Peptide Fragmentation Calculator generates theoretical fragment ion spectra for peptide sequences using the
- powerful **pyOpenMS** library. This tool simulates what would happen when a peptide is fragmented in a mass
- spectrometer, providing essential information for mass spectrometry analysis and peptide identification.
-
- ## Peptide Fragmentation Theory
-
- When peptides are subjected to collision-induced dissociation (CID) or higher-energy collisional dissociation (HCD)
- in a mass spectrometer, they fragment primarily along the peptide backbone. The fragmentation produces two series
- of ions:
-
- - **N-terminal ions**: Contain the N-terminus of the original peptide
- - **C-terminal ions**: Contain the C-terminus of the original peptide
-
- ### Ion Types Explained
-
- #### N-terminal Fragment Ions
- - **a-ions**: Result from cleavage of the C-N bond with loss of CO (carbonyl group)
- - Formula: [M + H - CO]⁺ where M is the N-terminal fragment mass
- - Less commonly observed in standard CID conditions
-
- - **b-ions**: Result from cleavage of the peptide bond (amide bond)
- - Formula: [M + H]⁺ where M is the N-terminal fragment mass
- - Most abundant N-terminal ions in CID spectra
-
- - **c-ions**: Result from cleavage of the N-Cα bond with retention of NH₃
- - Formula: [M + H + NH₃]⁺ where M is the N-terminal fragment mass
- - More common in ETD (electron transfer dissociation) conditions
-
- #### C-terminal Fragment Ions
- - **x-ions**: Result from cleavage of the N-Cα bond with addition of CO
- - Formula: [M + H + CO - H]⁺ where M is the C-terminal fragment mass
- - Less commonly observed
-
- - **y-ions**: Result from cleavage of the peptide bond with addition of H₂O
- - Formula: [M + H + H₂O]⁺ where M is the C-terminal fragment mass
- - Most abundant C-terminal ions in CID spectra
-
- - **z-ions**: Result from cleavage of the N-Cα bond with loss of NH₂
- - Formula: [M + H - NH₂]⁺ where M is the C-terminal fragment mass
- - More common in ETD conditions
-
- ## Usage Instructions
-
- ### 1. Enter Peptide Sequence
- - Use standard single-letter amino acid codes (A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y)
- - Extended codes (X, U) are also supported
- - Minimum sequence length: 2 amino acids
- - Example: `PEPTIDE`, `SAMPLESEQUENCE`, `ACDEFGHIK`
-
- ### 2. Select Ion Types
- - Choose which fragment ion types to include in the spectrum
- - **Recommended for CID/HCD**: b-ions and y-ions (default selection)
- - **For ETD analysis**: Add c-ions and z-ions
- - **Comprehensive analysis**: Select all ion types
-
- ### 3. Choose Charge States
- - Select the charge states to consider (1+ to 5+)
- - **Typical choice**: 1+ and 2+ for most peptides
- - **For longer peptides**: Include higher charge states (3+, 4+)
- - Higher charge states produce fragments at lower m/z values
-
- ### 4. Interpret Results
-
- #### Spectrum Plot
- - **X-axis**: m/z (mass-to-charge ratio)
- - **Y-axis**: Relative intensity (theoretical, normalized)
- - **Colors**: Different colors represent different ion types
- - **Hover**: Shows detailed information for each peak
-
- #### Fragment Table
- - **Ion Type**: The type of fragment ion (a, b, c, x, y, z)
- - **Fragment**: The fragment number (position from terminus)
- - **Charge**: The charge state of the fragment
- - **m/z**: The theoretical mass-to-charge ratio
- - **Sequence**: The amino acid sequence of the fragment
-
- ## Technical Details
-
- ### Algorithm
- - Uses pyOpenMS `TheoreticalSpectrumGenerator` class
- - Calculates exact monoisotopic masses for fragments
- - Applies ion-type specific mass corrections
- - Supports multiple charge states simultaneously
-
- ### Mass Calculations
- The theoretical m/z values are calculated using:
- ```
- m/z = (fragment_mass + ion_type_correction + charge × proton_mass) / charge
- ```
-
- Where:
- - `fragment_mass`: Exact monoisotopic mass of the amino acid sequence
- - `ion_type_correction`: Ion-specific mass adjustment (see ion types above)
- - `proton_mass`: 1.007276 Da
- - `charge`: The charge state (1, 2, 3, etc.)
-
- ### Parameters
- - **Isotopes**: Disabled for cleaner spectra (monoisotopic peaks only)
- - **Neutral losses**: Disabled by default for simplicity
- - **Mass accuracy**: Calculated to 4 decimal places
- - **Intensity**: Relative theoretical intensities (not experimental)
-
- ## Example Workflows
-
- ### Basic Peptide Analysis
- 1. Enter sequence: `PEPTIDE`
- 2. Select: b-ions and y-ions
- 3. Charge states: 1+ and 2+
- 4. Expected fragments: b₁-b₆, y₁-y₆ ions
-
- ### Comprehensive Fragmentation
- 1. Enter sequence: `SAMPLESEQUENCE`
- 2. Select: All ion types
- 3. Charge states: 1+, 2+, 3+
- 4. Results: Complete fragmentation pattern
-
- ### ETD Simulation
- 1. Enter sequence: `PEPTIDE`
- 2. Select: c-ions and z-ions
- 3. Charge states: 1+ and 2+
- 4. Results: ETD-like fragmentation pattern
-
- ## Troubleshooting
-
- ### Common Issues
-
- **"Sequence cannot be empty"**
- - Solution: Enter a valid amino acid sequence
-
- **"Invalid amino acid(s): X"**
- - Solution: Check for typos or non-standard amino acid codes
- - Use only standard single-letter codes
-
- **"Sequence must be at least 2 amino acids long"**
- - Solution: Enter a longer peptide sequence
- - Single amino acids cannot be fragmented
-
- **"Please select at least one ion type"**
- - Solution: Check at least one ion type checkbox
-
- **"Please select at least one charge state"**
- - Solution: Select at least one charge state from the dropdown
-
- ### Performance Notes
- - Longer sequences (>20 amino acids) may take longer to process
- - Higher charge states increase computation time
- - All ion types selected will generate more fragments
-
- ## Applications
-
- ### Mass Spectrometry Method Development
- - Design targeted MS/MS experiments
- - Optimize fragmentation conditions
- - Predict optimal precursor charge states
-
- ### Peptide Identification
- - Compare experimental spectra with theoretical fragments
- - Validate peptide sequence assignments
- - Understand fragmentation efficiency
-
- ### Educational Purposes
- - Learn peptide fragmentation patterns
- - Understand ion nomenclature
- - Explore charge state effects
-
- ## References and Further Reading
-
- ### Key Publications
- 1. **Roepstorff, P. & Fohlman, J.** (1984). Proposal for a common nomenclature for sequence ions in mass spectra of peptides. *Biomed. Mass Spectrom.* 11, 601.
-
- 2. **Senko, M.W. et al.** (1995). Determination of monoisotopic masses and ion populations for large biomolecules from resolved isotopic distributions. *J. Am. Soc. Mass Spectrom.* 6, 229-233.
-
- 3. **Hunt, D.F. et al.** (1986). Protein sequencing by tandem mass spectrometry. *Proc. Natl. Acad. Sci. USA* 83, 6233-6237.
-
- ### Software and Tools
- - **pyOpenMS**: Open-source mass spectrometry library ([www.openms.de](https://www.openms.de))
- - **NIST Mass Spectral Database**: Reference spectra and fragmentation patterns
- - **Protein Prospector**: Online MS tools from UCSF
-
- ### Educational Resources
- - **Mass Spectrometry: A Textbook** by Jürgen H. Gross
- - **Introduction to Mass Spectrometry** by J. Throck Watson
- - Online tutorials at [www.massspecpedia.com](http://www.massspecpedia.com)
-
- ---
-
- 💡 **Tip**: Start with the default settings (b-ions and y-ions, charges 1+ and 2+) for most peptides,
- then customize based on your specific analytical needs.
- """)
-
-col1, col2 = st.columns([1, 1])
-
-with col1:
- st.subheader("Input Parameters")
-
- # Peptide sequence input
- sequence_input = st.text_area(
- "Peptide Sequence:",
- value="PEPTIDE",
- height=200,
- help="""Enter the peptide sequence using single-letter amino acid codes:
-
-• Standard amino acids: A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y
-• Extended codes: X (any amino acid), U (selenocysteine)
-• Minimum length: 2 amino acids for fragmentation
-• Spaces and non-letter characters will be automatically removed
-
-Examples: PEPTIDE, ACDEFGHIK, SAMPLESEQUENCE"""
- )
-
- # Ion type selection
- st.write("**Ion Types:**")
- st.caption("Select which fragment ion types to include in the theoretical spectrum")
- ion_types = []
-
- col_ions1, col_ions2 = st.columns(2)
-
- with col_ions1:
- st.markdown("**N-terminal ions:**")
- if st.checkbox("a-ions", help="""a-ions: N-terminal fragments with CO loss
-
-• Formation: Cleavage at peptide bond + loss of CO (28 Da)
-• Formula: [M + H - CO]⁺
-• Abundance: Low in CID, moderate in high-energy conditions
-• Mass shift: -27.99 Da from corresponding b-ion"""):
- ion_types.append('a')
- if st.checkbox("b-ions", value=True, help="""b-ions: Most common N-terminal fragments
-
-• Formation: Direct cleavage at peptide bond (amide bond)
-• Formula: [M + H]⁺ where M = N-terminal fragment mass
-• Abundance: High in CID/HCD spectra (dominant N-terminal series)
-• Nomenclature: b₁, b₂, b₃... numbered from N-terminus"""):
- ion_types.append('b')
- if st.checkbox("c-ions", help="""c-ions: N-terminal fragments with NH₃ retention
-
-• Formation: Cleavage at N-Cα bond + retention of NH₃
-• Formula: [M + H + NH₃]⁺
-• Abundance: High in ETD/ECD, low in CID
-• Mass shift: +17.03 Da from corresponding b-ion"""):
- ion_types.append('c')
-
- with col_ions2:
- st.markdown("**C-terminal ions:**")
- if st.checkbox("x-ions", help="""x-ions: C-terminal fragments with CO addition
-
-• Formation: Cleavage at N-Cα bond + addition of CO
-• Formula: [M + H + CO - H]⁺
-• Abundance: Low in most fragmentation methods
-• Mass shift: +25.98 Da from corresponding y-ion"""):
- ion_types.append('x')
- if st.checkbox("y-ions", value=True, help="""y-ions: Most common C-terminal fragments
-
-• Formation: Cleavage at peptide bond + addition of H₂O
-• Formula: [M + H + H₂O]⁺ where M = C-terminal fragment mass
-• Abundance: High in CID/HCD spectra (dominant C-terminal series)
-• Nomenclature: y₁, y₂, y₃... numbered from C-terminus"""):
- ion_types.append('y')
- if st.checkbox("z-ions", help="""z-ions: C-terminal fragments with NH₂ loss
-
-• Formation: Cleavage at N-Cα bond + loss of NH₂
-• Formula: [M + H - NH₂]⁺
-• Abundance: High in ETD/ECD, low in CID
-• Mass shift: +0.98 Da from corresponding y-ion"""):
- ion_types.append('z')
-
- # Charge state selection
- charges = st.multiselect(
- "Charge States:",
- options=[1, 2, 3, 4, 5],
- default=[1, 2],
- help="""Select charge states to include in the theoretical spectrum:
-
-• 1+: Singly charged fragments (most common for short peptides)
-• 2+: Doubly charged fragments (common for longer peptides)
-• 3+ and higher: Multiple charges (for long peptides, lower m/z values)
-
-Higher charge states:
-- Produce fragments at lower m/z ratios
-- Are more common with longer peptide sequences
-- May improve fragmentation coverage
-- Require higher precursor charge states"""
- )
-
- # Initialize result_data
- result_data = None
-
- # Generate button
- if st.button('Generate Fragment Spectrum', type='primary'):
- with st.spinner('Generating theoretical spectrum...'):
- result_data = generate_theoretical_spectrum(sequence_input, ion_types, charges)
-
-with col2:
- st.subheader("Results")
-
- if result_data:
- if result_data["success"]:
- # Display basic info
- st.write(f"**Sequence:** {result_data['sequence']}")
- st.write(f"**Ion Types:** {', '.join([ION_TYPES[ion]['name'] for ion in result_data['ion_types']])}")
- st.write(f"**Charge States:** {', '.join(map(str, result_data['charges']))}")
- st.write(f"**Total Fragments:** {len(result_data['fragments'])}")
-
- # Summary by ion type
- if len(result_data['fragments']) > 0:
- summary = result_data['fragments'].groupby('ion_type').size()
- st.write("**Fragments by Ion Type:**")
- for ion_type, count in summary.items():
- ion_name = ION_TYPES.get(ion_type, {}).get('name', ion_type)
- st.write(f"- {ion_name}: {count}")
- else:
- st.error(f"Error: {result_data['error']}")
-
-# Display plot and data table
-if 'result_data' in locals() and result_data and result_data["success"]:
- # Create and display plot
- fig = create_fragmentation_plot(result_data)
- show_fig(fig, 'Fragment Spectrum')
-
- # Display fragment table
- st.subheader("Fragment Ion Table")
-
- # Format the dataframe for display
- display_df = result_data['fragments'].copy()
- display_df['m/z'] = display_df['mz'].round(4)
- display_df['Ion Type'] = display_df['ion_type'].map(lambda x: ION_TYPES.get(x, {}).get('name', x))
- display_df['Fragment'] = display_df['fragment_number']
- display_df['Charge'] = display_df['charge'].astype(str) + '+'
- display_df['Sequence'] = display_df['sequence']
- #display_df['Intensity'] = display_df['intensity'].apply(lambda x: f"{x:.2e}")
-
- # Select columns for display
- display_columns = ['Ion Type', 'Fragment', 'Charge', 'm/z',
- #'Intensity',
- 'Sequence']
- st.dataframe(display_df[display_columns], use_container_width=True)
-
- # Download options
- st.subheader("Export Options")
-
- # Prepare TSV data
- tsv_buffer = io.StringIO()
- display_df[display_columns].to_csv(tsv_buffer, sep='\t', index=False)
- tsv_buffer.seek(0)
- tsv_data = tsv_buffer.getvalue()
-
- # Try to create Excel file with xlsxwriter, fallback to TSV if not available
- xlsx_available = True
- xlsx_data = None
- xlsx_error_msg = None
-
- try:
- xlsx_buffer = io.BytesIO()
- with pd.ExcelWriter(xlsx_buffer, engine="xlsxwriter") as writer:
- display_df[display_columns].to_excel(writer, index=False, sheet_name="Fragment Ions")
- xlsx_buffer.seek(0)
- xlsx_data = xlsx_buffer.getvalue()
- except ImportError as e:
- xlsx_available = False
- xlsx_error_msg = "xlsxwriter module not available"
- st.warning("⚠️ Excel export unavailable: xlsxwriter module not found. Using TSV format as fallback.")
- except Exception as e:
- xlsx_available = False
- xlsx_error_msg = f"Excel export error: {str(e)}"
- st.warning(f"⚠️ Excel export failed: {str(e)}. Using TSV format as fallback.")
-
- if xlsx_available:
- col_tsv, col_xlsx = st.columns(2)
- else:
- col_tsv, col_tsv_fallback = st.columns(2)
-
- with col_tsv:
- st.download_button(
- label="Download TSV",
- data=tsv_data,
- file_name=f"fragments_{result_data['sequence']}.tsv",
- mime="text/tab-separated-values"
- )
-
- if xlsx_available:
- with col_xlsx:
- st.download_button(
- label="Download Excel",
- data=xlsx_data,
- file_name=f"fragments_{result_data['sequence']}.xlsx",
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
- )
- else:
- with col_tsv_fallback:
- st.download_button(
- label="Download TSV (Excel fallback)",
- data=tsv_data,
- file_name=f"fragments_{result_data['sequence']}_fallback.tsv",
- mime="text/tab-separated-values",
- help="Excel export unavailable, downloading as TSV instead"
- )
diff --git a/content/isotope_pattern_generator.py b/content/isotope_pattern_generator.py
deleted file mode 100644
index 680d2d0..0000000
--- a/content/isotope_pattern_generator.py
+++ /dev/null
@@ -1,625 +0,0 @@
-import io
-import re
-from typing import Tuple, Dict, Any, Optional
-
-import plotly.graph_objects as go
-import streamlit as st
-import pyopenms as oms
-import pandas as pd
-import numpy as np
-
-from src.common.common import page_setup, show_fig
-
-params = page_setup()
-
-# Initialize pattern generators
-coarse_pattern_generator = oms.CoarseIsotopePatternGenerator()
-fine_pattern_generator = oms.FineIsotopePatternGenerator()
-
-pd.options.plotting.backend = "ms_plotly"
-
-def validate_elemental_formula(formula_str: str) -> Tuple[bool, str, Optional[oms.EmpiricalFormula]]:
- """Validate an elemental formula string using pyOpenMS.
-
- Args:
- formula_str (str): The elemental formula string (e.g., "C100H150N26O30S1")
-
- Returns:
- Tuple[bool, str, Optional[EmpiricalFormula]]: (is_valid, error_message, formula_object)
- """
- try:
- # Clean the formula string
- formula_str = formula_str.strip()
- if not formula_str:
- return False, "Formula cannot be empty", None
-
- # Try to parse with pyOpenMS
- empirical_formula = oms.EmpiricalFormula(formula_str)
-
- return True, "", empirical_formula
-
- except Exception as e:
- return False, f"Invalid formula format: {str(e)}", None
-
-def validate_peptide_sequence(sequence_str: str) -> Tuple[bool, str, Optional[str]]:
- """Validate a peptide/protein sequence.
-
- Args:
- sequence_str (str): The amino acid sequence
-
- Returns:
- Tuple[bool, str, Optional[str]]: (is_valid, error_message, clean_sequence)
- """
- try:
- # Clean the sequence
- sequence_str = sequence_str.strip().upper()
- if not sequence_str:
- return False, "Sequence cannot be empty", None
-
- # Remove common formatting characters
- clean_sequence = re.sub(r'[^ACDEFGHIKLMNPQRSTVWYXU]', '', sequence_str)
-
- if not clean_sequence:
- return False, "No valid amino acid letters found", None
-
- # Validate amino acids
- valid_aa = set("ACDEFGHIKLMNPQRSTVWYXU")
- invalid_chars = [aa for aa in clean_sequence if aa not in valid_aa]
-
- if invalid_chars:
- invalid_list = ", ".join(sorted(set(invalid_chars)))
- return False, f"Invalid amino acid(s): {invalid_list}", None
-
- return True, "", clean_sequence
-
- except Exception as e:
- return False, f"Error validating sequence: {str(e)}", None
-
-def validate_oligonucleotide_sequence(sequence_str: str) -> Tuple[bool, str, Optional[str]]:
- """Validate an oligonucleotide (RNA) sequence
-
- Args:
- sequence_str (str): The nucleotide sequence
-
- Returns:
- Tuple[bool, str, Optional[str]]: (is_valid, error_message, sequence_str)
- """
- try:
- # Clean the sequence
- sequence_str = sequence_str.strip().upper()
- if not sequence_str:
- return False, "Sequence cannot be empty", None
-
- # Remove common formatting characters (spaces, numbers, newlines)
- clean_sequence = re.sub(r'[^ACGU]', '', sequence_str)
-
- if not clean_sequence:
- return False, "No valid nucleotide letters found", None
-
- # Validate nucleotides (A, C, G, U for RNA)
- valid_nucleotides = set("ACGU")
- invalid_chars = [nt for nt in clean_sequence if nt not in valid_nucleotides]
-
- if invalid_chars:
- invalid_list = ", ".join(sorted(set(invalid_chars)))
- return False, f"Invalid nucleotide(s): {invalid_list}. Valid nucleotides: A, C, G, U", None
-
- return True, "", sequence_str
-
- except Exception as e:
- return False, f"Error validating oligonucleotide sequence: {str(e)}", None
-
-def generate_isotope_pattern_from_formula(formula_str: str, use_fine_generator: bool = False) -> Dict[str, Any]:
- """Generate isotope pattern from elemental formula using specified generator.
-
- Args:
- formula_str (str): The elemental formula string
- use_fine_generator (bool): Whether to use FineIsotopePatternGenerator (default: False)
-
- Returns:
- Dict[str, Any]: Results dictionary with mzs, intensities, and metadata
- """
- try:
- # Validate formula
- is_valid, error_msg, empirical_formula = validate_elemental_formula(formula_str)
- if not is_valid:
- return {"success": False, "error": error_msg}
-
- # Select generator
- generator = fine_pattern_generator if use_fine_generator else coarse_pattern_generator
- generator_name = "Fine" if use_fine_generator else "Coarse"
-
- # Generate isotope pattern
- isotope_distribution = empirical_formula.getIsotopeDistribution(generator)
- avg_weight = empirical_formula.getAverageWeight()
- distribution = isotope_distribution.getContainer()
-
- # Extract data
- mzs = np.array([p.getMZ() for p in distribution])
- intensities = np.array([p.getIntensity() for p in distribution])
-
- # Calculate masses
- monoisotopic_mass = empirical_formula.getMonoWeight()
- average_mass = empirical_formula.getAverageWeight()
-
- return {
- "success": True,
- "mzs": mzs,
- "intensities": intensities,
- "monoisotopic_mass": monoisotopic_mass,
- "average_mass": average_mass,
- "formula": formula_str,
- "source_type": f"Elemental Formula ({generator_name})",
- "input_value": formula_str,
- "generator": generator_name
- }
-
- except Exception as e:
- return {"success": False, "error": f"Error generating pattern from formula: {str(e)}"}
-
-def generate_isotope_pattern_from_sequence(sequence_str: str, use_fine_generator: bool = False) -> Dict[str, Any]:
- """Generate isotope pattern from peptide/protein sequence using specified generator.
-
- Args:
- sequence_str (str): The amino acid sequence
- use_fine_generator (bool): Whether to use FineIsotopePatternGenerator (default: False)
-
- Returns:
- Dict[str, Any]: Results dictionary with mzs, intensities, and metadata
- """
- try:
- # Validate sequence
- is_valid, error_msg, clean_sequence = validate_peptide_sequence(sequence_str)
- if not is_valid:
- return {"success": False, "error": error_msg}
-
- # Create AASequence object
- aa_sequence = oms.AASequence.fromString(clean_sequence)
-
- # Get empirical formula from sequence
- empirical_formula = aa_sequence.getFormula()
-
- # Select generator
- generator = fine_pattern_generator if use_fine_generator else coarse_pattern_generator
- generator_name = "Fine" if use_fine_generator else "Coarse"
-
- # Generate isotope pattern
- isotope_distribution = empirical_formula.getIsotopeDistribution(generator)
- avg_weight = aa_sequence.getAverageWeight()
-
- distribution = isotope_distribution.getContainer()
-
- # Extract data
- mzs = np.array([p.getMZ() for p in distribution])
- intensities = np.array([p.getIntensity() for p in distribution])
-
- # Calculate masses
- monoisotopic_mass = aa_sequence.getMonoWeight()
- average_mass = aa_sequence.getAverageWeight()
-
- # Handle formula string conversion (pyOpenMS version compatibility)
- formula_str = empirical_formula.toString()
- if isinstance(formula_str, bytes):
- formula_str = formula_str.decode('utf-8')
-
- return {
- "success": True,
- "mzs": mzs,
- "intensities": intensities,
- "monoisotopic_mass": monoisotopic_mass,
- "average_mass": average_mass,
- "formula": formula_str,
- "sequence": clean_sequence,
- "source_type": f"Peptide/Protein Sequence ({generator_name})",
- "input_value": sequence_str,
- "generator": generator_name
- }
-
- except Exception as e:
- return {"success": False, "error": f"Error generating pattern from sequence: {str(e)}"}
-
-def generate_isotope_pattern_from_oligonucleotide(sequence_str: str, use_fine_generator: bool = False) -> Dict[str, Any]:
- """Generate isotope pattern from oligonucleotide (DNA/RNA) sequence using specified generator.
-
- Args:
- sequence_str (str): The nucleotide sequence (DNA will be converted to RNA)
- use_fine_generator (bool): Whether to use FineIsotopePatternGenerator (default: False)
-
- Returns:
- Dict[str, Any]: Results dictionary with mzs, intensities, and metadata
- """
- try:
- # Validate sequence (converts DNA to RNA automatically)
- is_valid, error_msg, rna_sequence = validate_oligonucleotide_sequence(sequence_str)
- if not is_valid:
- return {"success": False, "error": error_msg}
-
- # Check if conversion happened
- original_clean = re.sub(r'[^ACGTUN]', '', sequence_str.strip().upper())
- conversion_note = ""
- if 'T' in original_clean:
- conversion_note = " (DNA converted to RNA: T→U)"
-
- # Create NASequence object (for nucleic acids - RNA only)
- na_sequence = oms.NASequence.fromString(rna_sequence)
-
- # Get empirical formula from sequence
- empirical_formula = na_sequence.getFormula()
-
- # Select generator
- generator = fine_pattern_generator if use_fine_generator else coarse_pattern_generator
- generator_name = "Fine" if use_fine_generator else "Coarse"
-
- # Generate isotope pattern
- isotope_distribution = empirical_formula.getIsotopeDistribution(generator)
- avg_weight = na_sequence.getAverageWeight()
-
- distribution = isotope_distribution.getContainer()
-
- # Extract data
- mzs = np.array([p.getMZ() for p in distribution])
- intensities = np.array([p.getIntensity() for p in distribution])
-
- # Calculate masses
- monoisotopic_mass = na_sequence.getMonoWeight()
- average_mass = na_sequence.getAverageWeight()
-
- # Handle formula string conversion (pyOpenMS version compatibility)
- formula_str = empirical_formula.toString()
- if isinstance(formula_str, bytes):
- formula_str = formula_str.decode('utf-8')
-
- return {
- "success": True,
- "mzs": mzs,
- "intensities": intensities,
- "monoisotopic_mass": monoisotopic_mass,
- "average_mass": average_mass,
- "formula": formula_str,
- "sequence": rna_sequence,
- "original_sequence": original_clean,
- "conversion_note": conversion_note,
- "source_type": f"Oligonucleotide Sequence ({generator_name}){conversion_note}",
- "input_value": sequence_str,
- "generator": generator_name
- }
-
- except Exception as e:
- return {"success": False, "error": f"Error generating pattern from oligonucleotide: {str(e)}"}
-
-def generate_isotope_pattern_from_mass(target_mass: float) -> Dict[str, Any]:
- """Generate isotope pattern from mass using CoarseIsotopePatternGenerator (existing method).
-
- Args:
- target_mass (float): The target mass in Da
-
- Returns:
- Dict[str, Any]: Results dictionary with mzs, intensities, and metadata
- """
- try:
- if target_mass <= 0:
- return {"success": False, "error": "Mass must be greater than 0"}
-
- # Start with most_intense_mass == avg_mass (existing algorithm)
- start = coarse_pattern_generator.estimateFromPeptideWeight(target_mass).getMostAbundant().getMZ()
-
- # Extend to the right
- right_samples = []
- right_samples_avg = []
- for delta in np.arange(0, 20, 0.2):
- current_sample = coarse_pattern_generator.estimateFromPeptideWeight(
- target_mass + delta
- ).getMostAbundant().getMZ()
- right_samples.append(current_sample)
- right_samples_avg.append(target_mass + delta)
-
- # Stop extension if result gets worse than base case
- if abs(current_sample - target_mass) > abs(start - target_mass):
- break
-
- # Extend to the left
- left_samples = []
- left_samples_avg = []
- for delta in np.arange(0, 20, 0.2):
- current_sample = coarse_pattern_generator.estimateFromPeptideWeight(
- target_mass - delta
- ).getMostAbundant().getMZ()
- left_samples.append(current_sample)
- left_samples_avg.append(target_mass - delta)
-
- # Stop extension if result gets worse than base case
- if abs(current_sample - target_mass) > abs(start - target_mass):
- break
-
- # Combine samples
- samples = np.array(left_samples + [start] + right_samples)
- samples_avg = np.array(left_samples_avg + [target_mass] + right_samples_avg)
-
- # Determine best fit
- best_pos = np.argmin(np.abs(samples - target_mass))
- best_avg = samples_avg[best_pos]
-
- # Compute distribution of best fit
- distribution_obj = coarse_pattern_generator.estimateFromPeptideWeight(best_avg)
- distribution = distribution_obj.getContainer()
- mzs = np.array([p.getMZ() for p in distribution])
- intensities = np.array([p.getIntensity() for p in distribution])
- monoisotopic = np.min(mzs) # Monoisotopic isotope = lightest
-
- # Recompute average
- best_avg = np.sum(mzs * intensities)
-
- # Adjust distribution
- delta = distribution_obj.getMostAbundant().getMZ() - target_mass
- mzs -= delta
- best_avg -= delta
- monoisotopic -= delta
-
- return {
- "success": True,
- "mzs": mzs,
- "intensities": intensities,
- "monoisotopic_mass": monoisotopic,
- "average_mass": best_avg,
- "formula": "Estimated from mass",
- "source_type": "Mass Estimation",
- "input_value": f"{target_mass:.2f} Da"
- }
-
- except Exception as e:
- return {"success": False, "error": f"Error generating pattern from mass: {str(e)}"}
-
-def create_isotope_plot(result_data: Dict[str, Any]) -> go.Figure:
- """Create the isotope pattern plot.
-
- Args:
- result_data (Dict[str, Any]): Results from pattern generation
-
- Returns:
- go.Figure: Plotly figure object
- """
- mzs = result_data["mzs"]
- intensities = result_data["intensities"]
-
- # Create dataframe
- df = pd.DataFrame({
- 'mz': mzs,
- 'intensity': intensities
- })
-
- # Color highlights
- df['color'] = 'black'
- df.iloc[np.argmax(df['intensity']), -1] = 'red'
-
- # Plot
- fig = go.Figure()
- fig = df[df['intensity'] != 0].plot(
- x="mz",
- y="intensity",
- kind="spectrum",
- peak_color='color',
- canvas=fig,
- show_plot=False,
- grid=False,
- annotate_top_n_peaks=1
- )
-
- considered = mzs[intensities > (0.001 * max(intensities))]
- fig.update_xaxes(range=[np.min(considered), np.max(considered)])
- fig.update_layout(
- title="Isotopic Envelope",
- xaxis_title="m/z",
- yaxis_title="Intensity"
- )
-
- return fig
-
-# UI Implementation
-st.title("📶 Isotopic Pattern Calculator")
-
-st.markdown("""
-**Generate theoretical isotopic envelopes** for molecules to understand mass spectrometric signatures and optimize analysis conditions.
-
-This tool calculates isotopic distributions using pyOpenMS, helping you:
-- **Predict MS peak patterns** for accurate mass measurements and peak assignment
-- **Optimize MS parameters** by understanding peak spacing and intensity distributions
-- **Validate experimental data** by comparing observed vs theoretical patterns
-- **Design targeted experiments** by predicting isotopic signatures for specific molecules
-""")
-
-with st.expander("📚 **Understanding Isotopic Patterns**"):
- st.markdown("""
- **Natural Isotopes:**
- - Elements exist as multiple isotopes with different masses
- - **Carbon**: ¹²C (98.9%) and ¹³C (1.1%) - primary contributor to isotopic patterns
- - **Nitrogen**: ¹⁴N (99.6%) and ¹⁵N (0.4%) - important for peptides and nucleotides
- - **Sulfur**: ³²S (95.0%) and ³⁴S (4.2%) - significant contribution in proteins
-
- **Isotopic Envelope Shape:**
- - **Small molecules**: Simple patterns with M+1, M+2 peaks
- - **Large molecules**: Complex bell-shaped distributions
- - **Pattern width** increases with molecular size due to multiple isotopic combinations
-
- **Input Methods:**
- - **Mass Estimation**: Quick approximation for unknown compounds
- - **Elemental Formula**: Precise calculation for known molecular composition
- - **Peptide/Protein**: Automatic formula calculation from amino acid sequence
- - **Oligonucleotides**: DNA/RNA sequence support with automatic T→U conversion
-
- **Generator Options:**
- - **Coarse Generator**: Fast computation, suitable for most applications
- - **Fine Generator**: High precision for detailed isotopic analysis
-
- **Applications:**
- - **Peak assignment** in high-resolution mass spectrometry
- - **Charge state determination** through isotopic peak spacing
- - **Molecular formula confirmation** by pattern matching
- - **Method development** for accurate mass measurements
- """)
-
-st.markdown("""
-**Choose your input method:**
-""")
-
-# Input method selection
-input_method = st.selectbox(
- "Select Input Method:",
- ["Mass (Da)", "Elemental Formula", "Peptide/Protein Sequence", "Oligonucleotide Sequence"],
- help="Choose how you want to specify your molecule"
-)
-
-# Generator selection (only for formula, sequence, and oligonucleotide)
-if input_method in ["Elemental Formula", "Peptide/Protein Sequence", "Oligonucleotide Sequence"]:
- use_fine_generator = st.checkbox(
- "Use Fine Isotope Pattern Generator",
- value=False,
- help="""
- - **Coarse Generator** (Default): Faster computation, good for most applications
- - **Fine Generator**: More precise calculations, slower for large molecules
- """
- )
-else:
- use_fine_generator = False
-
-col1, col2 = st.columns([1, 1])
-
-with col1:
- result_data = None
-
- if input_method == "Mass (Da)":
- target_mass = st.number_input(
- "Input most abundant/intense peak [Da]:",
- min_value=0.0,
- value=20000.0,
- help="""
- The most intense (or most abundant) peak is the isotope peak
- with the highest abundance in the protein's mass spectrum. It
- represents the most common isotopic composition and serves as
- the reference point for reconstructing the full isotopic envelope.
- """
- )
-
- if st.button('Compute Isotopic Envelope'):
- with st.spinner('Computing from mass...'):
- result_data = generate_isotope_pattern_from_mass(target_mass)
-
- elif input_method == "Elemental Formula":
- formula_input = st.text_input(
- "Elemental Formula:",
- value="C100H150N26O30S1",
- help="""
- Enter the molecular formula using standard notation.
- Examples: C100H150N26O30S1, C6H12O6, C43H66N12O12S2
- """
- )
-
- if st.button('Compute Isotopic Envelope'):
- generator_type = "fine" if use_fine_generator else "coarse"
- with st.spinner(f'Computing from formula using {generator_type} generator...'):
- result_data = generate_isotope_pattern_from_formula(formula_input, use_fine_generator)
-
- elif input_method == "Peptide/Protein Sequence":
- sequence_input = st.text_area(
- "Amino Acid Sequence:",
- value="PEPTIDE",
- height=100,
- help="""
- Enter the peptide or protein sequence using single-letter amino acid codes.
- Examples: PEPTIDE, MKLNFSLRLRR, ACDEFGHIKLMNPQRSTVWY
- """
- )
-
- if st.button('Compute Isotopic Envelope'):
- generator_type = "fine" if use_fine_generator else "coarse"
- with st.spinner(f'Computing from sequence using {generator_type} generator...'):
- result_data = generate_isotope_pattern_from_sequence(sequence_input, use_fine_generator)
-
- elif input_method == "Oligonucleotide Sequence":
- oligonucleotide_input = st.text_area(
- "Nucleotide Sequence:",
- value="AUCGAUCG",
- height=100,
- help="""
- RNA sequence using standard nucleotide codes.
- Valid nucleotides: A (adenine), C (cytosine), G (guanine), U (uracil), N (any)
- Examples: AUCGAUCG, AAAUUUCCCGGG
- """
- )
-
- if st.button('Compute Isotopic Envelope'):
- generator_type = "fine" if use_fine_generator else "coarse"
- with st.spinner(f'Computing from oligonucleotide using {generator_type} generator...'):
- result_data = generate_isotope_pattern_from_oligonucleotide(oligonucleotide_input, use_fine_generator)
-
-with col2:
- if result_data:
- if result_data["success"]:
- # Display results
- st.write(f"**Source:** {result_data['source_type']}")
- st.write(f"**Input:** {result_data['input_value']}")
- if "generator" in result_data:
- st.write(f"**Generator:** {result_data['generator']} Isotope Pattern Generator")
- if "formula" in result_data:
- st.write(f"**Molecular Formula:** {result_data['formula']}")
- if "sequence" in result_data:
- st.write(f"**Sequence:** {result_data['sequence']}")
- # Show conversion info for oligonucleotides
- if "original_sequence" in result_data and "conversion_note" in result_data:
- if result_data["conversion_note"]:
- st.write(f"**Original Sequence:** {result_data['original_sequence']}")
- st.info(f"DNA sequence converted to RNA for processing{result_data['conversion_note']}")
- st.write(f"**Monoisotopic Mass:** {result_data['monoisotopic_mass']:.5f} Da")
- st.write(f"**Average Mass:** {result_data['average_mass']:.5f} Da")
- else:
- st.error(f"Error: {result_data['error']}")
-
-# Display plot and download options
-if result_data and result_data["success"]:
- # Create and display plot
- fig = create_isotope_plot(result_data)
- show_fig(fig, 'Isotopic Envelope')
-
- # Prepare download data
- df_out = pd.DataFrame({
- 'mz': result_data["mzs"],
- 'intensity': result_data["intensities"],
- 'color': ['red' if i == np.argmax(result_data["intensities"]) else 'black'
- for i in range(len(result_data["mzs"]))]
- })
-
- # Create download files
- tsv_buffer = io.StringIO()
- df_out.to_csv(tsv_buffer, sep='\t', index=False)
- tsv_buffer.seek(0)
- tsv_file = tsv_buffer.getvalue()
-
- xlsx_buffer = io.BytesIO()
- with pd.ExcelWriter(xlsx_buffer, engine="xlsxwriter") as writer:
- df_out.to_excel(writer, index=False, sheet_name="MS Data")
- xlsx_buffer.seek(0)
- xlsx_file = xlsx_buffer.getvalue()
-
- # Download buttons
- tsv_col, excel_col, _ = st.columns(3)
-
- @st.fragment
- def tsv_download():
- st.download_button(
- label="Download TSV file",
- file_name=f'Isotopic_Envelope_{result_data["source_type"].replace("/", "_").replace(" ", "_")}.tsv',
- data=tsv_file
- )
-
- with tsv_col:
- tsv_download()
-
- @st.fragment
- def xlsx_download():
- st.download_button(
- label="Download Excel file",
- file_name=f'Isotopic_Envelope_{result_data["source_type"].replace("/", "_").replace(" ", "_")}.xlsx',
- data=xlsx_file
- )
-
- with excel_col:
- xlsx_download()
\ No newline at end of file
diff --git a/test_gui.py b/test_gui.py
index 04cf6e0..fa699ea 100644
--- a/test_gui.py
+++ b/test_gui.py
@@ -35,9 +35,8 @@ def _uses_page_link(path: str) -> bool:
# Collect all content pages: those registered in app.py plus any other .py files
-# in content/ (utility pages like digest.py, fragmentation.py, etc.).
-# Exclude pages using st.page_link() — these require full st.navigation()
-# context and cannot be launched in isolation via AppTest.
+# in content/. Exclude pages using st.page_link() — these require full
+# st.navigation() context and cannot be launched in isolation via AppTest.
_app_pages = get_pages_from_app()
_all_content = sorted(
str(p) for p in Path("content").glob("*.py") if p.name != "__init__.py"
diff --git a/utils/__init__.py b/utils/__init__.py
deleted file mode 100644
index 560b277..0000000
--- a/utils/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""
-Utility modules for the streamlit-template application.
-"""
\ No newline at end of file
diff --git a/utils/digest.py b/utils/digest.py
deleted file mode 100644
index 0aa73f9..0000000
--- a/utils/digest.py
+++ /dev/null
@@ -1,359 +0,0 @@
-"""
-Protein digestion utilities using pyOpenMS.
-"""
-import pandas as pd
-from typing import List, Tuple, Dict, Any
-import pyopenms as oms
-from .fasta import extract_accession, extract_description
-
-
-import os
-
-
-def perform_digest(sequences: List[Tuple[str, str]], enzyme: str, missed_cleavages: int, max_charges: int) -> pd.DataFrame:
- """
- Perform in silico protein digestion using pyOpenMS.
-
- Args:
- sequences: List of (header, sequence) tuples
- enzyme: Enzyme name for digestion
- missed_cleavages: Maximum number of missed cleavages
- max_charges: Maximum charge state to calculate
-
- Returns:
- pandas DataFrame with digest results
- """
- results = []
-
- # Set up the digestion
- digest = oms.ProteaseDigestion()
- digest.setEnzyme(enzyme)
- digest.setMissedCleavages(missed_cleavages)
-
- for header, sequence in sequences:
- accession = extract_accession(header)
- description = extract_description(header)
- try:
- # Use the correct pyOpenMS digest method with string input
- peptide_strings = []
- seq = oms.AASequence.fromString(sequence)
- digest.digest(seq, peptide_strings)
-
- #for peptide_seq in peptide_strings:
- # os.write(1, f"Generated peptide: {peptide_seq}\n".encode())
- for i, peptide in enumerate(peptide_strings):
-
- if peptide.size() > 0: # Skip empty peptides
- try:
- # Calculate mass using AASequence
- aa_seq = oms.AASequence(peptide)
- mono_mass = aa_seq.getMonoWeight()
-
- # Create row data
- peptide_string = peptide.toString()
-
- # Find all positions of this peptide in the original sequence
- start_positions = []
- end_positions = []
- start_pos = 0
- while True:
- pos = sequence.find(peptide_string, start_pos)
- if pos == -1:
- break
- start_positions.append(str(pos + 1)) # Convert to 1-based
- end_positions.append(str(pos + len(peptide_string))) # End position (1-based)
- start_pos = pos + 1
-
- # Join positions with commas if multiple occurrences
- start_str = ','.join(start_positions)
- end_str = ','.join(end_positions)
-
- row_data = {
- 'Accession': accession,
- 'Description': description,
- 'Peptide Sequence': peptide_string,
- 'Length': len(peptide_string),
- 'Start': start_str,
- 'End': end_str,
- '[M]': round(mono_mass, 4)
- }
-
- # Add charged masses [M + zH]
- for charge in range(1, max_charges + 1):
- charged_mass = (mono_mass + charge * 1.007276) / charge
- row_data[f'[M + {charge}H]'] = round(charged_mass, 4)
-
- results.append(row_data)
- except Exception:
- # Skip problematic peptides
- continue
- except Exception:
- # If digest fails, skip this sequence
- continue
-
- return pd.DataFrame(results)
-
-
-def calculate_mass_with_charge(mono_mass: float, charge: int) -> float:
- """
- Calculate mass-to-charge ratio for a given monoisotopic mass and charge.
-
- Args:
- mono_mass: Monoisotopic mass
- charge: Charge state
-
- Returns:
- Mass-to-charge ratio
- """
- proton_mass = 1.007276 # Mass of a proton
- return (mono_mass + charge * proton_mass) / charge
-
-
-def get_digest_statistics(df: pd.DataFrame) -> Dict[str, Any]:
- """
- Calculate statistics for the digest results.
-
- Args:
- df: DataFrame with digest results
-
- Returns:
- Dictionary with statistics
- """
- if df.empty:
- return {
- 'total_peptides': 0,
- 'unique_proteins': 0,
- 'avg_peptide_length': 0,
- 'mass_range': (0, 0)
- }
-
- stats = {
- 'total_peptides': len(df),
- 'unique_proteins': df['Accession'].nunique(),
- 'avg_peptide_length': df['Peptide Sequence'].str.len().mean(),
- 'mass_range': (df['[M]'].min(), df['[M]'].max())
- }
-
- return stats
-
-
-def filter_peptides_by_mass(df: pd.DataFrame, min_mass: float = None, max_mass: float = None) -> pd.DataFrame:
- """
- Filter peptides by mass range.
-
- Args:
- df: DataFrame with digest results
- min_mass: Minimum mass threshold
- max_mass: Maximum mass threshold
-
- Returns:
- Filtered DataFrame
- """
- filtered_df = df.copy()
-
- if min_mass is not None:
- filtered_df = filtered_df[filtered_df['[M]'] >= min_mass]
-
- if max_mass is not None:
- filtered_df = filtered_df[filtered_df['[M]'] <= max_mass]
-
- return filtered_df
-
-
-def filter_peptides_by_length(df: pd.DataFrame, min_length: int = None, max_length: int = None) -> pd.DataFrame:
- """
- Filter peptides by amino acid sequence length.
-
- Args:
- df: DataFrame with digest results
- min_length: Minimum peptide length (number of amino acids)
- max_length: Maximum peptide length (number of amino acids)
-
- Returns:
- Filtered DataFrame
- """
- filtered_df = df.copy()
-
- if min_length is not None:
- filtered_df = filtered_df[filtered_df['Peptide Sequence'].str.len() >= min_length]
-
- if max_length is not None:
- filtered_df = filtered_df[filtered_df['Peptide Sequence'].str.len() <= max_length]
-
- return filtered_df
-
-
-def get_available_enzymes() -> List[str]:
- """
- Get list of available enzymes from pyOpenMS EnzymesDB.
-
- Returns:
- List of enzyme names
-
- Raises:
- RuntimeError: If pyOpenMS enzyme database cannot be loaded
- """
- try:
- # Get enzyme database
- enzyme_db = oms.ProteaseDB()
- enzymes = []
- enzyme_db.getAllNames(enzymes)
- return enzymes
- except Exception as e:
- raise RuntimeError(f"Failed to load pyOpenMS enzyme database: {e}. Please ensure pyOpenMS is properly configured.") from e
-
-
-def validate_enzyme(enzyme_name: str) -> bool:
- """
- Validate if an enzyme is supported by pyOpenMS.
-
- Args:
- enzyme_name: Name of the enzyme
-
- Returns:
- True if enzyme is supported, False otherwise
- """
- try:
- digest = oms.ProteaseDigestion()
- digest.setEnzyme(enzyme_name)
- return True
- except Exception:
- return False
-
-
-def create_digest_summary(df: pd.DataFrame) -> str:
- """
- Create a text summary of the digest results.
-
- Args:
- df: DataFrame with digest results
-
- Returns:
- Summary text
- """
- if df.empty:
- return "No peptides generated from the digest."
-
- stats = get_digest_statistics(df)
-
- summary = f"""
- **Digest Summary:**
- - Total peptides: {stats['total_peptides']:,}
- - Unique proteins: {stats['unique_proteins']}
- - Average peptide length: {stats['avg_peptide_length']:.1f} amino acids
- - Mass range: {stats['mass_range'][0]:.2f} - {stats['mass_range'][1]:.2f} Da
- """
-
- return summary
-
-
-def calculate_protein_coverage(df: pd.DataFrame, sequences: List[Tuple[str, str]]) -> Dict[str, Dict]:
- """
- Calculate coverage for each position in each protein sequence.
-
- Args:
- df: DataFrame with digest results
- sequences: List of (header, sequence) tuples
-
- Returns:
- Dictionary mapping accession to coverage info
- """
- coverage_data = {}
-
- # Create mapping from accession to sequence
- accession_to_sequence = {}
- for header, sequence in sequences:
- accession = extract_accession(header)
- accession_to_sequence[accession] = sequence
-
- # Initialize coverage arrays for each protein
- for accession, sequence in accession_to_sequence.items():
- coverage_data[accession] = {
- 'sequence': sequence,
- 'coverage': [0] * len(sequence),
- 'description': ''
- }
-
- # Calculate coverage from digest results
- for _, row in df.iterrows():
- accession = row['Accession']
- if accession in coverage_data:
- # Get description from first occurrence
- if not coverage_data[accession]['description']:
- coverage_data[accession]['description'] = row['Description']
-
- # Parse start and end positions
- start_positions = row['Start'].split(',') if row['Start'] else []
- end_positions = row['End'].split(',') if row['End'] else []
-
- # Increment coverage for each occurrence of this peptide
- for start_str, end_str in zip(start_positions, end_positions):
- try:
- start = int(start_str) - 1 # Convert to 0-based
- end = int(end_str) # End is already exclusive in 1-based
-
- # Increment coverage for all positions covered by this peptide
- for pos in range(start, end):
- if 0 <= pos < len(coverage_data[accession]['coverage']):
- coverage_data[accession]['coverage'][pos] += 1
- except (ValueError, IndexError):
- continue
-
- return coverage_data
-
-
-def generate_coverage_html(accession: str, coverage_info: Dict) -> str:
- """
- Generate HTML for protein sequence with coverage coloring.
-
- Args:
- accession: Protein accession
- coverage_info: Coverage information dictionary
-
- Returns:
- HTML string for colored sequence
- """
- sequence = coverage_info['sequence']
- coverage = coverage_info['coverage']
- description = coverage_info['description']
-
- # Define colors for different coverage levels
- colors = {
- 0: '#f0f0f0', # Light gray for no coverage
- 1: '#ffffcc', # Light yellow for 1x coverage
- 2: '#ffcc99', # Light orange for 2x coverage
- 3: '#ff9999', # Light red for 3x coverage
- 4: '#ff6666', # Medium red for 4x coverage
- }
-
- html_parts = [f"{accession} - {description}
"]
- html_parts.append("")
-
- # Add coverage legend
- html_parts.append("
")
- html_parts.append("Coverage: ")
- for level, color in colors.items():
- if level <= 4:
- label = f"{level}x" if level < 4 else "4+x"
- html_parts.append(f"{label}")
- html_parts.append("
")
-
- # Generate colored sequence
- for i, aa in enumerate(sequence):
- if i < len(coverage):
- cov_level = min(coverage[i], 4) # Cap at 4 for coloring
- color = colors.get(cov_level, colors[4])
- else:
- cov_level = 0 # Default coverage level for positions beyond coverage array
- color = colors[0]
-
- html_parts.append(f"
{aa}")
-
- # Add line breaks every 50 amino acids for readability
- if (i + 1) % 50 == 0:
- html_parts.append("
")
-
- html_parts.append("
")
- html_parts.append("
")
-
- return "".join(html_parts)
diff --git a/utils/fasta.py b/utils/fasta.py
deleted file mode 100644
index 1d2758d..0000000
--- a/utils/fasta.py
+++ /dev/null
@@ -1,151 +0,0 @@
-"""
-FASTA parsing and validation utilities.
-"""
-import re
-from typing import List, Tuple, Optional
-
-
-def parse_fasta(fasta_text: str) -> List[Tuple[str, str]]:
- """
- Parse FASTA text into a list of (header, sequence) tuples.
-
- Args:
- fasta_text: Raw FASTA text input
-
- Returns:
- List of tuples containing (header, sequence)
-
- Raises:
- ValueError: If FASTA format is invalid
- """
- if not fasta_text.strip():
- return []
-
- sequences = []
- lines = fasta_text.strip().split('\n')
- current_header = None
- current_sequence = []
-
- for line_num, line in enumerate(lines, 1):
- line = line.strip()
- if not line:
- continue
-
- if line.startswith('>'):
- # Save previous sequence if exists
- if current_header is not None:
- seq = ''.join(current_sequence)
- if seq:
- sequences.append((current_header, seq))
- else:
- raise ValueError(f"Empty sequence found for header: {current_header}")
-
- # Start new sequence
- current_header = line[1:] # Remove '>' prefix
- current_sequence = []
- else:
- if current_header is None:
- raise ValueError(f"Line {line_num}: Sequence data found before header")
- current_sequence.append(line.upper())
-
- # Add the last sequence
- if current_header is not None:
- seq = ''.join(current_sequence)
- if seq:
- sequences.append((current_header, seq))
- else:
- raise ValueError(f"Empty sequence found for header: {current_header}")
-
- if not sequences:
- raise ValueError("No valid FASTA sequences found")
-
- return sequences
-
-
-def validate_protein_sequence(sequence: str) -> bool:
- """
- Validate that a sequence contains only valid amino acid characters.
-
- Args:
- sequence: Protein sequence string
-
- Returns:
- True if valid, False otherwise
- """
- # Valid amino acid single letter codes
- valid_aa = set('ACDEFGHIKLMNPQRSTVWY')
- return all(aa in valid_aa for aa in sequence.upper())
-
-
-def extract_accession(header: str) -> str:
- """
- Extract accession number from FASTA header.
-
- Args:
- header: FASTA header line (without '>')
-
- Returns:
- Accession number or original header if no standard format found
- """
- # Try to extract accession from common formats
- # UniProt format: sp|P12345|PROT_HUMAN or tr|A0A123B4C5|A0A123B4C5_HUMAN
- uniprot_match = re.match(r'(sp|tr)\|([^|]+)\|', header)
- if uniprot_match:
- return uniprot_match.group(2)
-
- # NCBI format: gi|123456|ref|NP_123456.1| or ref|NP_123456.1|
- ncbi_match = re.match(r'(?:gi\|\d+\|)?(?:ref\|)?([^|]+)', header)
- if ncbi_match:
- return ncbi_match.group(1)
-
- # Generic format: take first word
- first_word = header.split()[0] if header.split() else header
- return first_word
-
-
-def extract_description(header: str) -> str:
- """
- Extract description from FASTA header.
-
- Args:
- header: FASTA header line (without '>')
-
- Returns:
- Description part of the header
- """
- # For UniProt format, description comes after the second |
- uniprot_match = re.match(r'(sp|tr)\|[^|]+\|[^|\s]+\s*(.*)', header)
- if uniprot_match:
- return uniprot_match.group(2).strip()
-
- # For other formats, try to extract everything after first space
- parts = header.split(' ', 1)
- if len(parts) > 1:
- return parts[1].strip()
-
- return header
-
-
-def validate_fasta_input(fasta_text: str) -> Tuple[bool, Optional[str], List[Tuple[str, str]]]:
- """
- Validate FASTA input and return parsed sequences if valid.
-
- Args:
- fasta_text: Raw FASTA text input
-
- Returns:
- Tuple of (is_valid, error_message, sequences)
- """
- try:
- sequences = parse_fasta(fasta_text)
-
- # Validate each sequence
- for header, sequence in sequences:
- if not validate_protein_sequence(sequence):
- invalid_chars = set(sequence.upper()) - set('ACDEFGHIKLMNPQRSTVWY')
- return False, f"Invalid amino acids found in sequence '{extract_accession(header)}': {', '.join(sorted(invalid_chars))}", []
-
- return True, None, sequences
-
- except ValueError as e:
- return False, str(e), []
\ No newline at end of file