diff --git a/pydata-global-2024/category.json b/pydata-global-2024/category.json new file mode 100644 index 000000000..6cbf0d254 --- /dev/null +++ b/pydata-global-2024/category.json @@ -0,0 +1,3 @@ +{ + "title": "PyData Global 2024" +} diff --git a/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json b/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json new file mode 100644 index 000000000..e848c5727 --- /dev/null +++ b/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "DuckDB is revolutionizing data processing by enabling in-memory OLAP SQL operations with a lightweight, dependency-free architecture. This talk explores how DuckDB can be leveraged to handle large-scale, massively parallel data processing, ranging from hundreds of gigabytes to terabytes, outside traditional SQL and Spark warehouse systems. We will go over the integration with the Python ecosystem and demonstrate its scaling potential using the cloud compute.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1772, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Adarsh Namala" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/qSs5ALVbzTk/maxresdefault.jpg", + "title": "Scaling Outside the Warehouse Using DuckDB and Python", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=qSs5ALVbzTk" + } + ] +} diff --git a/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json b/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json new file mode 100644 index 000000000..0e7dfa625 --- /dev/null +++ b/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Hi! Have you ever wished your pure Python libraries were faster? Or wanted to fundamentally improve a Python library by rewriting everything in a faster language like C or Rust? Well, wish no more... NetworkX's backend dispatching mechanism redirects your plain old NetworkX function calls to a FASTER implementation present in a separate backend package by leveraging the Python's entry_point specification!\n\nNetworkX is a popular, pure Python library used for graph(aka network) analysis. But when the graph size increases (like a network of everyone in the world), then NetworkX algorithms could take days to solve a simple graph analysis problem. So, to address these performance issues, a backend dispatching mechanism was recently developed. In this talk, we will unveil this dispatching mechanism and its implementation details, and how we can use it just by specifying a backend kwarg like this:\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1746, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Aditi Juneja" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/2UkZVKj6QGY/maxresdefault.jpg", + "title": "Understanding API Dispatching in NetworkX", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=2UkZVKj6QGY" + } + ] +} diff --git a/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json b/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json new file mode 100644 index 000000000..419b01703 --- /dev/null +++ b/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "This talk will cover how to use pre-trained HuggingFace models, specifically wav2vec 2.0 and WavLM, to detect audio deepfakes. These deepfakes, made possible by advanced voice cloning tools like ElevenLabs and Respeecher, present risks in areas like misinformation, fraud, and privacy violations. The session will introduce deepfake audio, discuss current trends in voice cloning, and provide a hands-on tutorial for using these transformer-based models to identify synthetic voices by spotting subtle anomalies. Participants will learn how to set up these models, analyze deepfake audio datasets, and assess detection performance, bridging the gap between speech generation and detection technologies.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1857, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Adriana Stan" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/MGRmKlDj9rk/maxresdefault.jpg", + "title": "Off-the-shelf HuggingFace models for audio deepfake detection", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=MGRmKlDj9rk" + } + ] +} diff --git a/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json b/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json new file mode 100644 index 000000000..f1d9fcfd8 --- /dev/null +++ b/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json @@ -0,0 +1,28 @@ +{ + "description": "Beneath the buzz of AI breakthroughs, a quiet revolution is unfolding in the world of forecasting: foundational time series models. These models promise to change the game for operational forecasting, but don’t expect magic. You won’t suddenly become a stock market oracle just by throwing data at them.\n\nIn this talk, we’ll peel back the layers of these new time series models, starting with how they work and how they evolved from transformers. We’ll tackle the big problems of limited data and overhyped algorithms, and explore the real-world challenges that make or break forecasts (hint: human input matters).\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1865, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Ahad Shoaib" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/5Nt0p_3zU7g/maxresdefault.jpg", + "title": "Foundational Time Series Models in Practice: The Future of Forecasting, or Just Hype?", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=5Nt0p_3zU7g" + } + ] +} diff --git a/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json b/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json new file mode 100644 index 000000000..76b03768f --- /dev/null +++ b/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json @@ -0,0 +1,29 @@ +{ + "description": "Vector databases are everywhere, powering LLMs. But indexing embeddings, especially multivector embeddings like ColPali and Colbert, at a bulk is memory intensive. Vector streaming solves this problem by parallelizing the tasks of parsing, chunking, and embedding generation and indexing it continuously chunk by chunk instead of bulk. This not only increase the speed but also makes the whole task more optimized and memory efficient.\n\nThe library gives many vector database supports, like Pinecone, Weavaite, and Elastic.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1680, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Akshay Ballal", + "Sonam Pankaj" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/FdOeLY3rGA8/maxresdefault.jpg", + "title": "The Memory Efficient Indexing for Vector Databases", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=FdOeLY3rGA8" + } + ] +} diff --git a/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json b/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json new file mode 100644 index 000000000..3e95fb3ce --- /dev/null +++ b/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Time series analysis provides essential tools for modeling and predicting time-dependent data, especially data exhibiting seasonal patterns or serial correlation. This tutorial covers tools in the StatsModels library including seasonal decomposition and ARIMA. We'll develop the ARIMA model bottom-up, implementing it one piece at a time, and then using StatsModels. As examples, we'll look at weather data and electricity generation from renewable sources in the United States since 2004 -- but the methods we'll cover apply to many kinds of real-world time series data.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 5376, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Allen Downey" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/foMbacbuAQk/maxresdefault.jpg", + "title": "Time Series Analysis with StatsModels", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=foMbacbuAQk" + } + ] +} diff --git a/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json b/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json new file mode 100644 index 000000000..a76d3f19e --- /dev/null +++ b/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json @@ -0,0 +1,28 @@ +{ + "description": "Knowledge graphs are excellent at representing and storing heterogeneous and interconnected information in a structured manner, effectively capturing complex relationships and attributes across different data types.\nStructured text generation allows for building knowledge graphs by providing neatly structured outputs, making it an ideal method for extracting structured information.\nSimilarly, structured text generation enables the creation of agents by defining which tools are allowed and what action inputs are permitted.\nIn this talk, we first build a graph database from unstructured data and then we create an agent to query the graph database. We will show these capabilities with a demo.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1696, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Alonso Silva" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/94yuQKoDKkE/maxresdefault.jpg", + "title": "Building Knowledge Graph-Based Agents with Structured Text Generation", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=94yuQKoDKkE" + } + ] +} diff --git a/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json b/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json new file mode 100644 index 000000000..812105db7 --- /dev/null +++ b/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Taking any project from zero to production is challenging. And Data Science has a particularly high failure rate, with a lot of ideas not getting beyond the prototype stage.\n\nBut there are real reasons for this: there is intrinsic and unknown complexity in data, and there are often big challenges knowing if we have actually solved the problem -- the answer is so rarely \"yes\" or \"no\".\n\nIn this talk I'll cover some key learnings from a decade working on DS problems at early- and later-stage startups, building products to improve product market fit.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1706, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Andrew Weeks" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/FA1TWdxoyV4/maxresdefault.jpg", + "title": "Taking Data Science in industry from zero to production", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=FA1TWdxoyV4" + } + ] +} diff --git a/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json b/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json new file mode 100644 index 000000000..0fae8c72a --- /dev/null +++ b/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "This talk showcases and exemplifies the rapid specification and execution of Quantile Regression workflows. Various use cases are discussed, including fitting, outlier detection, conditional CDFs, and simulations, using different types of time series data.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1752, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Anton Antonov" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/Z2uz7kwBli8/maxresdefault.jpg", + "title": "Quantile Regression Workflows", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Z2uz7kwBli8" + } + ] +} diff --git a/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json b/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json new file mode 100644 index 000000000..1709a2ab4 --- /dev/null +++ b/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Learn how we built a lightning-fast search engine using Python, balancing speed, relevance, and scalability. In this session, we’ll explore our hybrid approach, blending vector search with traditional keyword indexing to deliver high quality, accurate results. Discover how we harness a high-performance NoSQL database for efficient data management and fine-tune our results with a re-ranking algorithm for top-notch accuracy.\nWe’ll dive into the hurdles we overcame, like ensuring data consistency in a NoSQL setup, balancing search precision and performance, and designing a scalable architecture. By the end, you’ll understand how this Python-powered engine works, its real-world applications, and the innovative solutions that set it apart.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1669, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Art Anderson" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/wn1L3hlYfc0/maxresdefault.jpg", + "title": "A Deep Dive into Python-Powered Precision and Scalability", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=wn1L3hlYfc0" + } + ] +} diff --git a/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json b/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json new file mode 100644 index 000000000..ef9b2cfbc --- /dev/null +++ b/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Rapid adoption of generative AI requires ensuring your application is trustworthy. Careful experimentation and measurement are necessary for this new era of non-deterministic software. In this talk, we will take learnings from 100s of conversations across enterprise AI teams, and discuss how developers can mitigate hallucinations, better inspect their AI systems, and productionize applications with effective guardrails and evaluation checks in place.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1737, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Atin Sanyal" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/3iQFdcVf9jI/maxresdefault.jpg", + "title": "Effective GenAI Evaluations: Mitigate Hallucinations and Ship Fast", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=3iQFdcVf9jI" + } + ] +} diff --git a/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json b/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json new file mode 100644 index 000000000..f789c3d83 --- /dev/null +++ b/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "As data scientists and machine learning engineers, it is crucial that we can reproduce results and seamlessly share projects across teams and stakeholders. However, differing operating systems, Python environments, package versions, and package managers often hinder reproducibility across different machines. This talk will explore how Nix can be leveraged to create reproducible work environments and how it can be a convenient tool for any Data Scientist or ML Engineer.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1739, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Avik Basu" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/QgQzxcPZWxA/maxresdefault.jpg", + "title": "Reproducible Python projects using Nix", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=QgQzxcPZWxA" + } + ] +} diff --git a/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json b/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json new file mode 100644 index 000000000..aecdb473e --- /dev/null +++ b/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "In this talk, we will explore Judea Pearl’s causal ladder (association, intervention, and counterfactuals) through the lens of a simple demand forecasting model. Using real-world business scenarios, I will demonstrate how to move beyond correlation-based predictions to more actionable decisions using PyMC’s causal inference tools. Attendees will learn how to make forecasts for natural business conditions, simulate the effects of strategic changes (like increased advertising spend), and evaluate the causal impact of past price promotion with retrodictive causal inference.\n\nTarget audience: Data scientists, machine learning engineers, and business analysts looking to improve their decision-making using causal inference.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1798, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Benjamin Vincent" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/ajLPA34upQY/maxresdefault.jpg", + "title": "Climbing the causal ladder for fun and profit", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ajLPA34upQY" + } + ] +} diff --git a/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json b/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json new file mode 100644 index 000000000..bf812a6c9 --- /dev/null +++ b/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json @@ -0,0 +1,29 @@ +{ + "description": "The goal of this tutorial is to make Gaussian processes (GPs) useful. In most practicing data scientists' mental map of modeling and machine learning techniques, Gaussian processes are an advanced approach that sit alone on an island, perhaps with narrow use cases like Bayesian optimization. Most books and other material on GPs tend to focus on theoretical aspects, and it can be hard to close the gap between the theory and putting those ideas into practice to solve real problems in a reasonable amount of time.\n\nThis tutorial is split into two parts. The first part introduces Bayesian modeling, focusing on hierarchical modeling and the concept of partial pooling. We’ll use the classic example of estimating the batting average of a group of baseball players as motivation. Then we’ll introduce GPs as a useful generalization of hierarchical modeling for the common situation where our groups aren’t distinct categories. Instead of thinking of each baseball player as completely distinct and exchangeable entities, we can use a GP to partially pool information locally by also considering each player's age. Finally we’ll close the first part by connecting back to the more common introduction to GPs as infinite dimensional multivariate normals.\n\nThe second part of the tutorial will give an overview of practical tips and tricks for modeling with GPs using the open source Python package PyMC. Specifically, how to address the two big issues to using GPs in practice: scaling and identifiability. We’ll discuss useful approximations like the HSGP and when to apply them, advice on when to use splines, and finally when you need to step out of a PPL like PyMC or Stan to a GP specific library like GPFlow or GPyTorch. We’ll do so with a couple motivating examples. The audience should have some familiarity with basic ML and statistics concepts, such as probability distributions, normal and multivariate normal distributions, correlation and covariance, and linear regression - but the talk will aim to be non-technical and the goal will be introduce GPs and give people the tools they need to use them effectively in practice.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 5385, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Bill Engels", + "Chris Fonnesbeck" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/fi_S89jgUYU/maxresdefault.jpg", + "title": "Making Gaussian Processes Useful", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=fi_S89jgUYU" + } + ] +} diff --git a/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json b/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json new file mode 100644 index 000000000..0e708424a --- /dev/null +++ b/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json @@ -0,0 +1,28 @@ +{ + "description": "To identify a production-ready, open-source OCR model capable of handling sensitive, non-English content with highly technical language, we evaluated the performance of available open-source OCR models in terms of accuracy, memory efficiency, and processing speed. This presentation will share our findings and key insights gained from this research\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 2197, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Bing Wang" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/OitWeFVvShc/maxresdefault.jpg", + "title": "An Evaluation of Open-Source OCR Models for Japanese Medical Documents", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=OitWeFVvShc" + } + ] +} diff --git a/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json b/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json new file mode 100644 index 000000000..5d255b249 --- /dev/null +++ b/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json @@ -0,0 +1,34 @@ +{ + "description": "Colab Notebook Link: https://colab.research.google.com/drive/1faxDHE3LdAwH7MORdnJei87Q0WF1BhS0?usp=sharing\nMake a copy to your local drive to start working on this notebook.\n\nEver wondered how groundbreaking language models like ChatGPT and Llama were built? The answer lies in transformer, a powerful neural network architecture. In this workshop, we'll dive deep into the inner workings of transformers, with specific focus on self-attention mechanism. We will guide you through the process of building one from scratch. Whether you're a beginner or an experienced practitioner, this workshop is designed to cater to all levels of expertise.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 5337, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://colab.research.google.com/drive/1faxDHE3LdAwH7MORdnJei87Q0WF1BhS0?usp=sharing", + "url": "https://colab.research.google.com/drive/1faxDHE3LdAwH7MORdnJei87Q0WF1BhS0?usp=sharing" + } + ], + "speakers": [ + "Sheetal Borar", + "Chuxin Liu", + "Shefali Shrivastava" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/TWxD76J5Uho/maxresdefault.jpg", + "title": "Build Your Own Transformer", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=TWxD76J5Uho" + } + ] +} diff --git a/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json b/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json new file mode 100644 index 000000000..9090991ea --- /dev/null +++ b/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json @@ -0,0 +1,32 @@ +{ + "description": "LMs offer powerful capabilities, but deploying them effectively in production remains a challenge for conversational AI and Chatbot applications, especially when it comes to minimizing hallucinations and ensuring accurate responses. In this 90-minute hands-on tutorial, we’ll explore building conversational AI systems using CALM and Rasa. CALM (Conversational AI Language Model) combines traditional conversational AI techniques with LLMs, separating conversational ability from business logic execution to deliver reliable, cost efficient, and scalable solutions. Unlike LLMs that handle both sides of the conversation, CALM focuses on user understanding with predefined business logic. This approach not only accelerates development but also enhances cost efficiency, scalability and reliability. By focusing on predefined business logic with CALM, you’ll gain the ability to build sophisticated, scalable systems faster. You’ll also learn how to use fine-tuned, open-weight models, such as llama 8b to power your AI assistant.\n\nParticipants will learn how to use CALM for business logic and Rasa for dialogue management, with practical insights, code examples, and best practices. Materials will be provided via a GitHub repository with a GitHub Codespace for easy access and execution.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 4953, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Hugo Bowne-Anderson", + "Alan Nichol", + "Justina Petraitytė" + ], + "tags": [ + "tutorial" + ], + "thumbnail_url": "https://i.ytimg.com/vi/V7HQCMcaJ8A/maxresdefault.jpg", + "title": "Building an AI Travel Agent That Never Hallucinates", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=V7HQCMcaJ8A" + } + ] +} diff --git a/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json b/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json new file mode 100644 index 000000000..690b4e130 --- /dev/null +++ b/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "In partnership with the Department for Environment, Food and Rural Affairs (DEFRA), Datacove developed a bespoke Shiny dashboard designed to enhance decision-making in the areas of Health and Wellbeing, Nature, and Sustainability (HWNS). This presentation explores three key aspects: project and data management, customisation, and usability enhancements in R.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1659, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Jeremy Horne" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/Pq5VhosMJQE/maxresdefault.jpg", + "title": "Dashboards to Aid British Government Decisions (using R)", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Pq5VhosMJQE" + } + ] +} diff --git a/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json b/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json new file mode 100644 index 000000000..badd39168 --- /dev/null +++ b/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json @@ -0,0 +1,29 @@ +{ + "description": "This tutorial introduces Pixeltable, which provides data-centric AI infrastructure with a declarative, incremental approach for multimodal workloads. Participants will learn to manage multimodal data (text, images, video) using Pixeltable's declarative interface. We'll cover data versioning, indexing, and orchestration through computed columns and iterators. Attendees will gain practical experience with Pixeltable's integration capabilities and custom UDFs.\n\nRequirements: Python knowledge, basic ML concepts. Materials will be available via a GitHub repository and Google Colab notebooks.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1513, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Pierre Brunelle", + "Marcel Kornacker" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/C7_nw2Rebfs/maxresdefault.jpg", + "title": "Hands-on Multimodal AI Development with Pixeltable", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=C7_nw2Rebfs" + } + ] +} diff --git a/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json b/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json new file mode 100644 index 000000000..3b1ff6eba --- /dev/null +++ b/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json @@ -0,0 +1,28 @@ +{ + "description": "This tutorial empowers deep learning practitioners to master the entire PyTorch workflow, from efficient model creation to advanced tracking and optimization techniques. We'll begin by exploring a practical PyTorch workflow, then delve into integrating popular experiment tracking tools like MLFlow and Weights & Biases. You'll learn to log custom metrics, artifacts, and interactive visualizations, enhancing your model development process. Finally, we'll tackle hyperparameter optimization using Optuna's Bayesian search, all while maintaining meticulous experiment tracking for easy comparison and reproducibility.\n\nBy the end of the session, you'll have constructed a robust, modular pipeline for managing experiments and optimizing model performance. Whether you're new to PyTorch or an experienced data scientist looking to improve your workflow, this hands-on tutorial offers immediately applicable insights and techniques to enhance your deep learning projects across diverse domains.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 5443, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Cainã Max Couto da Silva" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/pzSzhn9H6X4/maxresdefault.jpg", + "title": "PyTorch Workflow Mastery: A Guide to Track and Optimize Model Performance", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=pzSzhn9H6X4" + } + ] +} diff --git a/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json b/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json new file mode 100644 index 000000000..67653ed48 --- /dev/null +++ b/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Learn how to write a native Python application in the browser using WebAssembly enabled by PyScript.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 5925, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Chris Laffra" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/J2XOSdDWPIo/maxresdefault.jpg", + "title": "PyScript - Writing a Python application in the browser", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=J2XOSdDWPIo" + } + ] +} diff --git a/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json b/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json new file mode 100644 index 000000000..2eca347f4 --- /dev/null +++ b/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Component-based modeling systems such as Simulink and Dymola allow for building scientific models in a way that can be composed. For example, Bob can build a model of an engine, and Alice can build a model of a drive shaft, and you can then connect the two models and have a model of a car. These kinds of tools are used all throughout industrial modeling and simulation in order to allow for \"separation of concerns\", allowing experts to engineer their domain and compose the final digital twins with reusable scientific modules. But what about open source? In this talk we will introduce ModelingToolkit, an open source component-based modeling framework that allows for composing pre-built models and scales to large high-fidelity digital twins.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1643, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Chris Rackauckas" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/yW4oU-7_tGE/maxresdefault.jpg", + "title": "Open Source Component-Based Modeling with ModelingToolkit", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=yW4oU-7_tGE" + } + ] +} diff --git a/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json b/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json new file mode 100644 index 000000000..dff0fab77 --- /dev/null +++ b/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json @@ -0,0 +1,29 @@ +{ + "description": "MAPIE (Model Agnostic Prediction Interval Estimator) is your go-to solution for managing uncertainties and risks in machine learning models. This Python library, nestled within scikit-learn-contrib, offers a way to calculate prediction sets with controlled coverage rates for regression and classification tasks.\n\nBut it doesn't stop there - MAPIE can also be used to handle more complex tasks like time series analysis, multi-label classification, computer vision and natural language processing, ensuring probabilistic guarantees on crucial metrics.\n\nJoin us as we delve into the world of conformal predictions and how to quickly manage your uncertainties using MAPIE.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 5056, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Hussein Jawad", + "Valentin Laurent" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/ZkLIWS9dlZI/maxresdefault.jpg", + "title": "Boosting AI Reliability: Uncertainty Quantification with MAPIE", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ZkLIWS9dlZI" + } + ] +} diff --git a/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json b/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json new file mode 100644 index 000000000..3d82b4f1b --- /dev/null +++ b/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Shiny for Python is an efficient and reactive application framework that will be able to grow with your application needs. As your shiny application grows, you may find yourself needing more custom behaviors and potentially reusing and sharing your custom behaviors with others. \nYou may also find your existing applications to be overly complex and had to see the overall structure of the application. Here are some tips on writing better Shiny Applications and leveling up your code.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1841, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Daniel Chen" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/2Cst7_s_4H8/maxresdefault.jpg", + "title": "Tips to Level-Up Your Shiny for Python Applications", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=2Cst7_s_4H8" + } + ] +} diff --git a/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json b/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json new file mode 100644 index 000000000..65fcbbd52 --- /dev/null +++ b/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Julia is a high-performance language for technical computing that offers advantages like type stability, just-in-time compilation, and extensive parallel computing support. Its Machine Learning ecosystem, although having fewer options, is functional and includes packages like DataFrames.jl, Flux.jl, MLJ.jl, and SciML for various ML tasks. Additional tools cover data visualization, R compatibility, and specific ML applications. The ecosystem is comprehensive and can meet many ML researcher/professional needs. This talk provides an overview of the ecosystem, discussing both its strengths and potential areas for improvement.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1672, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Daniel Molina" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/17Jm_Gqv3K8/maxresdefault.jpg", + "title": "Discover the Julia Machine Learning Ecosystem", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=17Jm_Gqv3K8" + } + ] +} diff --git a/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json b/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json new file mode 100644 index 000000000..8f2813ff2 --- /dev/null +++ b/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json @@ -0,0 +1,28 @@ +{ + "description": "Streamlining clinical trial output workflows is a key challenge in clinical studies. To deliver reports to health authorities, clinical trial statisticians need to create several scripts to produce deliverables such as output datasets, tables, figures, and listings. Statisticians must also handle specific execution orders to respect dependencies between the generated datasets.\n\nOur project leverages Python programming to automatically generate orchestration workflows from clinical trial project metadata using the Snakemake framework. Snakemake supports the execution of multiple jobs using Docker containers, facilitating multilingual orchestration. This enables our users to run end-to-end (E2E) data engineering workflows using their preferred programming languages, primarily SAS and R. Moreover, Snakemake allows parallel runs for efficient workflow management.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1793, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Daphné Grasselly" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/7xrDlgaz-QM/maxresdefault.jpg", + "title": "Enabling Multi-Language Programming in Data Engineering Workflows", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=7xrDlgaz-QM" + } + ] +} diff --git a/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json b/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json new file mode 100644 index 000000000..a0c2e3106 --- /dev/null +++ b/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "This talk will tell the tale of how we migrated a data application from Streamlit to Panel. And what it took to scale from 100 users to 2000+ users in less than 2 months. It's a story of pain, Kubernetes, resilience, and a whole lot of Python\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1817, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Duarte Carmo" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/xDcGtPgxXEk/maxresdefault.jpg", + "title": "Panel: The Dashboard That Grew - A Scaling Saga", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=xDcGtPgxXEk" + } + ] +} diff --git a/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json b/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json new file mode 100644 index 000000000..6bb9651f3 --- /dev/null +++ b/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json @@ -0,0 +1,29 @@ +{ + "description": "Retrieval-Augmented Generation (RAG), despite being a superstar of GenAI over the last year, comes with a plethora of challenges and is prone to errors. Open Source Python libraries like RAGAS and TruLens provide frameworks for evaluating RAG systems, using various metrics that leverage LLMs to assess performance. But when using LLM in a RAG system is in itself a source of errors, it remains to be seen how reliable it would be to use another LLM, allthebit a more powerful one, as a judge of the RAG performance. This study explores various RAG evaluation metrics, as well as the choice of evaluator LLM, to examine the reliability and consistency of LLM-based evaluations. The aim is to provide practical insights and guidance for interpreting these evaluations effectively, and help users make informed decisions when applying them in diverse contexts.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1807, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Nour El Mawass", + "Joe Neeman" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/hCCJoJ5URD0/maxresdefault.jpg", + "title": "Evaluating RAGs: On the correctness and coherence of Open Source eval metrics", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=hCCJoJ5URD0" + } + ] +} diff --git a/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json b/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json new file mode 100644 index 000000000..e31c763fa --- /dev/null +++ b/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "In this talk we present the OS library Burr -- a tool that makes it easier to build reliable, production-ready AI applications and agents. We will show how to use Burr to address a host of production concerns problems including generating test data from prior runs, interactive debugging, persisting/loading application state, and more\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1648, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Elijah ben Izzy" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/3Ks02G18anA/maxresdefault.jpg", + "title": "Build Production Ready AI Agents with Burr", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=3Ks02G18anA" + } + ] +} diff --git a/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json b/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json new file mode 100644 index 000000000..b0c23197a --- /dev/null +++ b/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Enjoy some data-driven laughs with Evan Wimpey, a data and analytics comedian (and we're not just talking about his coding skills). No data topic is off-limits, so come enjoy some of the funniest jokes ever told at a data conference.\n\nNote the baseline", + "duration": 1699, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Evan Wimpey" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/iJp12vplXAc/maxresdefault.jpg", + "title": "Python is a Joke!", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=iJp12vplXAc" + } + ] +} diff --git a/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json b/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json new file mode 100644 index 000000000..299378dfc --- /dev/null +++ b/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "An introduction to solving combinatorial optimization and constraint satisfaction problems in Python. I will review the most popular libraries for SAT/CSP. We will then deep dive to a crash corse on using Google's award winning OR-tools library, for efficiently solving some non-trivial real-world constrained combinatorial optimization problems.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1741, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Eyal Gruss" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/bl13uhchJVA/maxresdefault.jpg", + "title": "Let our optima combine!", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=bl13uhchJVA" + } + ] +} diff --git a/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json b/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json new file mode 100644 index 000000000..3367b40bf --- /dev/null +++ b/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json @@ -0,0 +1,32 @@ +{ + "description": "To apply or not to apply, that is the question.\n\nCausal reasoning elevates predictive outcomes by shifting from “what happened” to “what would happen if”. Yet, implementing causality can be challenging or even infeasible in some contexts. This talk explores how the very act of assessing its applicability can add value to your projects. Through a gentle introduction to causal inference tools and practical use cases, you will learn how to bring greater scientific rigour to real-world problems.\n\nTarget audience: Practicing and aspiring data scientists, machine learning engineers, and analysts looking to improve their decision-making with causal inference.\n\nNo prior knowledge is assumed.\n\nFor the seasoned practitioners I hope to shine light on aspects that may not have been considered. 💡\n\nCan't make the talk? Read all about it in my new TDS article: 🧠🧹 Causality — Mental Hygiene for Data Science (http://bit.ly/causal-hygiene)\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1756, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "http://bit.ly/causal-hygiene", + "url": "http://bit.ly/causal-hygiene" + } + ], + "speakers": [ + "Eyal Kazin" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/POMePoP8M-w/maxresdefault.jpg", + "title": "🧠🧹 Causality - Mental Hygiene for Data Science", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=POMePoP8M-w" + } + ] +} diff --git a/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json b/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json new file mode 100644 index 000000000..0e3ed367f --- /dev/null +++ b/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json @@ -0,0 +1,32 @@ +{ + "description": "As data grows larger and more complex, efficient storage and processing become critical to achieving scalable and high-performance computing. Blosc2 (https://www.blosc.org), a powerful meta-compressor library, addresses these challenges by enabling rapid compression and decompression of large, multidimensional arrays (NDArrays). This tutorial will introduce the core concepts of working with Blosc2, focusing on how it can be leveraged to optimize both storage and computational performance in Python.\n\nAttendees will learn how to:\n\n Efficiently create and manage large NDArrays, including options for persistence.\n Select the best codecs and filters for specific data types and workflows to achieve optimal compression ratios and performance.\n Perform computations directly on compressed data to save memory and speed up processing.\n Seamlessly share NDArrays using Caterva2, a versatile library designed to enable remote sharing and serving of multidimensional datasets.\n\nThis tutorial is ideal for Python developers working with large-scale data in scientific computing, machine learning, and other data-intensive fields.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 5220, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://www.blosc.org", + "url": "https://www.blosc.org" + } + ], + "speakers": [ + "Francesc Alted" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/aR-i_a3nGx0/maxresdefault.jpg", + "title": "Mastering Large NDArray Handling with Blosc2 and Caterva2", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=aR-i_a3nGx0" + } + ] +} diff --git a/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json b/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json new file mode 100644 index 000000000..6841451dd --- /dev/null +++ b/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json @@ -0,0 +1,28 @@ +{ + "description": "Non-Intrusive Load Monitoring (NILM) is a key technique in data-driven energy management and home automation, aimed at disaggregating energy consumption to identify active appliances in households and quantify their energy usage. This presentation:\n\n Provides an overview of NILM, highlighting its advantages and reviewing state-of-the-art deep learning algorithms developed for this purpose.\n Examines smart meters and IoT devices in energy systems, with a focus on the Chain2 protocol used in Italian energy systems. This event-based protocol generates low-volume data, enabling real-time energy monitoring and alerting.\n Presents examples of deep learning models trained on real-world IoT sensor data from energy meters, demonstrating their application in energy disaggregation.\n\nThis session offers an insightful overview of real-world deep learning applications in energy systems. While tailored for data scientists and data engineers interested in these fields, no prior knowledge is required. Join to explore how these technologies are driving energy optimization, cost reduction, and enhancing personal energy consumption awareness.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1796, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Francesco Conti" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/cMtYqUqdzsA/maxresdefault.jpg", + "title": "Deep Learning in Energy Management: Non-Intrusive Load Monitoring for IoT Devices", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=cMtYqUqdzsA" + } + ] +} diff --git a/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json b/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json new file mode 100644 index 000000000..939d3573e --- /dev/null +++ b/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "This talk is an introduction to automatic differentiation with a focus on the Python and Julia ecosystems. We will first explain what autodiff is and how it works, then describe its various implementations in both languages. Our goal is to give everyone a good understanding of how computer code can be differentiated, while also discussing the trade-offs this differentiability entails.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1727, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Guillaume Dalle" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/4sF-Wm8w31c/maxresdefault.jpg", + "title": "Automatic differentiation, a tale of two languages", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=4sF-Wm8w31c" + } + ] +} diff --git a/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json b/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json new file mode 100644 index 000000000..50bbdb128 --- /dev/null +++ b/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Changing data is hard: The computer may crash, scripts could fail, and data structures could be changing. Relational data management systems provide transactional (“ACID”) guarantees that can be immensely useful for data analysis. DuckDB provides all-or-nothing semantics for changes to datasets and is robust against failures of any kind. In this talk, we will illustrate the usefulness DuckDB’s transactional facilities to bring sanity to changes to data analysis workflows in Python.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1823, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Hannes Mühleisen" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/7UqLMHloTsQ/maxresdefault.jpg", + "title": "Changing Data With Confidence using DuckDB", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=7UqLMHloTsQ" + } + ] +} diff --git a/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json b/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json new file mode 100644 index 000000000..6c426b4ae --- /dev/null +++ b/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json @@ -0,0 +1,28 @@ +{ + "description": "This proposal aims to develop a Python curriculum for data science for multidisciplinary studies in university education. Data Science is nowadays a trending topic in any area like social science, finance, natural science and so many others. Therefore, every student in the university education is keen to learn data science using computer languages rather than using SPSS or other traditional data analysis tools especially related to research. So, this aims to develop a new curriculum for any student studying from any discipline in higher education to learn data science using trending techniques and tools. Python is the core programming language here because it is very widely used and related to data science field. Plus, it has many advantages like easy to learn and use, platform independence used, large and active community support. Utilizing Bloom’s Taxonomy as the guiding framework has developed a new curriculum for four-year degree programs to succeed in data driven world considering multidisciplinary approach. In this curriculum, students can start from Python basic programming concepts to progress to advanced analyzing techniques using libraries like Pandas, NumPy, and Seaborn, and platforms such as Anaconda and Google Colab and finally build own projects in that students related discipline. Ultimately this curriculum will leverage success in Data-centric society in domain specific applications.\n\nKeywords: Bloom’s, curriculum, multidisciplinary, python, science, taxonomy\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1756, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Hansila Sudasinghe" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/r3Diqvfy4Fo/maxresdefault.jpg", + "title": "PYDATA Bloom Framework: An Approach to Data Science in University Education", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=r3Diqvfy4Fo" + } + ] +} diff --git a/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json b/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json new file mode 100644 index 000000000..617b7da69 --- /dev/null +++ b/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Doing geoscience is hard. It’s even harder if you have to figure out how to handle large amounts of data!\n\nXarray is an open-source Python library designed to simplify the handling of labeled multi-dimensional arrays, like raster geospatial data, making it a favorite among geoscientists. It allows these scientists to easily express their computations, and is backed by Dask, a Python library for parallel and distributed computing, to scale computations to entire clusters of machines.\n\nPeople love using Xarray on Dask for geospatial workloads, but only up to about the terabyte scale. At this point, the stack can struggle, requiring expertise to work well and frustrating users and developers alike.\n\nTo address this and enable the Dask ❤️ Xarray stack to scale to hundreds of terabytes, we have recently designed a suite of large-scale geospatial benchmarks. With the help of these benchmarks, we are able to understand what limits performance within Dask and Xarray, and to address these issues.\nIn this talk, we will explore how Dask integrates with libraries like Xarray and Zarr to scale geospatial workloads and other multi-dimensional array computations.\n\nWe will also dive deeper into some of the bottlenecks in the Dask ❤️ Xarray stack that our benchmarks revealed, as well as some of the recent improvements we have made in these areas. With the help of our benchmark suite, we then assess the impact of these changes.\n\nJoin us to discover how Dask helps you scale geoscience workloads from your laptop to the cloud.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1771, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Hendrik Makait" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/KJxJRx7KQtc/maxresdefault.jpg", + "title": "Dask ❤ Xarray: Geoscience at Massive Scale", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=KJxJRx7KQtc" + } + ] +} diff --git a/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json b/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json new file mode 100644 index 000000000..2bf2585b4 --- /dev/null +++ b/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json @@ -0,0 +1,28 @@ +{ + "description": "A beginner level hands-on introduction to BigQuery DataFrames. Please bring your laptop! There is nothing to install in advance\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 5325, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Tim Swena" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/2D5-7zIeOQ4/maxresdefault.jpg", + "title": "Python + BigQuery + DataFrames: Hands on with scalable \"serverless\" analysis", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=2D5-7zIeOQ4" + } + ] +} diff --git a/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json b/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json new file mode 100644 index 000000000..6929256a6 --- /dev/null +++ b/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Having worked on Kaggle's LLM-based ARC AGI program-writing challenge for 6 months using Llama3, I'll give reflections on the lessons learned making an automatic program generator, evaluating it, coming up with strong representations for the challenge, chain-of-thought and program-of-thought styles and some multi-stage critical thinking approaches. You'll get tips for tuning your own prompts and shortcuts to help you evaluate your own LLM usage with greater assurance in the face of non-deterministic outcomes.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1742, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Ian Ozsvald" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/ft_PYi8A93M/maxresdefault.jpg", + "title": "Valuable LLM lessons learnt on Kaggle's ARC AGI Challenge", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ft_PYi8A93M" + } + ] +} diff --git a/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json b/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json new file mode 100644 index 000000000..7f1c52d9f --- /dev/null +++ b/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "9 out of 10 engineers will recommend the use of evaluation tools for their LLMs, but admit they only trust eyeballing responses to decide whether it's safe to use. The 10th carefully studies the floor in silence.\n\nThis talk is for engineers, developers or applied researchers who may or may not know of evaluation tools and metrics, but either way benefit from an overview of different risks in applications using LLMs for text generation, Open Source libraries they can use to mitigate these risks, and examples of how to use them.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1660, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Irina Vidal Migallón" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/o3dBxo6fgcA/maxresdefault.jpg", + "title": "Trustworthy LLMs: Vibe checks are not all you need", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=o3dBxo6fgcA" + } + ] +} diff --git a/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json b/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json new file mode 100644 index 000000000..a59432c51 --- /dev/null +++ b/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json @@ -0,0 +1,29 @@ +{ + "description": "Debugging software itself is a hard task, but debugging GPU software environments can be even more challenging. Understanding the intricate interactions between hardware, drivers, CUDA, C++ dependencies, and Python libraries can be far more complex.\n\nIn this talk we will dig into how these different layers interact and how you can address some of the common pitfalls that folks run into when configuring GPU Python environments. We will also introduce a new tool, RAPIDS Doctor, that aims to take the challenge out of ensuring your software environments are in good shape. RAPIDS Doctor checks and diagnoses environmental health issues straight from the command line, ensuring that your setup is fully functional and optimized for performance.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1645, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Jacob Tomlinson", + "Melody Wang" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/ghq-VDNvNss/maxresdefault.jpg", + "title": "The art of wrangling your GPU Python environments", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ghq-VDNvNss" + } + ] +} diff --git a/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json b/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json new file mode 100644 index 000000000..f174e796f --- /dev/null +++ b/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Due to its high-level syntax and powerful interactive prompt, Julia is typically used as a computational front-end language. However there is growing interest in using Julia to develop statically-compiled libraries to be called from other languages (Python, C++, etc.). I will present recent and ongoing work happening in the Julia community to enable this use case, including building smaller binaries and static analysis tooling.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1843, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Jeff Bezanson" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/LluyXFj9YDI/maxresdefault.jpg", + "title": "Statically-Compiled Julia for Library Development", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=LluyXFj9YDI" + } + ] +} diff --git a/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json b/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json new file mode 100644 index 000000000..d945deeb3 --- /dev/null +++ b/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json @@ -0,0 +1,29 @@ +{ + "description": "In the rapidly evolving field of natural language processing, the evaluation of large language models (LLMs) is crucial for understanding their performance and guiding their development. This talk delves into the two primary evaluation methodologies: reference-based and reference less techniques.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1933, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Parin Jhaveri", + "Riya Joshi" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/ObFGLVEPYoc/maxresdefault.jpg", + "title": "Holistic Evaluation of Large Language Models: From References to Human Judgment", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ObFGLVEPYoc" + } + ] +} diff --git a/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json b/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json new file mode 100644 index 000000000..d4f7977eb --- /dev/null +++ b/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "This talk will explain how to solve business forecasting problems using time series methods. Time series forecasting remains a specialty topic. Because of this you really want to use a package tuned for your use case and specialized to deal with the difficulties inherent in time series forecasting. I will share a simplified problem notation that helps you select between time series packages in R and Python\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1740, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "John Mount" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/7H44aJuK0Yg/maxresdefault.jpg", + "title": "Solving Forecasting Problems in R and Python", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=7H44aJuK0Yg" + } + ] +} diff --git a/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json b/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json new file mode 100644 index 000000000..3e6fbf868 --- /dev/null +++ b/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json @@ -0,0 +1,28 @@ +{ + "description": "In this 90-minute workshop, machine learning engineers and data scientists will learn practical techniques for identifying and mitigating age bias in AI-driven hiring systems. We’ll explore fairness metrics like statistical parity, counterfactual fairness, and equalized odds, and demonstrate how tools such as Fairlearn, Aequitas, and IBM Fairness 360 can be used to monitor and improve model fairness. Through hands-on exercises, participants will walk away with the skills to evaluate and de-bias models in high-risk areas like recruitment.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 5415, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "John Sandall" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/G1I45toaVSs/maxresdefault.jpg", + "title": "Fairness Tales: How To Measure And Mitigate Unfair Bias in Machine Learning Models", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=G1I45toaVSs" + } + ] +} diff --git a/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json b/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json new file mode 100644 index 000000000..ca2215445 --- /dev/null +++ b/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Learn to build powerful sensors running on low-cost microcontrollers, all in Python!\n\nDid you known that (Micro)Python can scale all the way down to microcontrollers\nthat have less than 1 MB of RAM and program memory? Such devices can cost just a few dollars, and are widely used to measure, log, analyze and react to physical phenomena. This enables a wide range of useful and fun applications - be it for a smart home, wearables, scientific measurements, consumer products or industrial solutions.\n\nIn this talk, we will demonstrate how to get started with MicroPython on a ESP32 microcontroller.\nWe will first show how to create a basic Internet-connected sensor node using simple analog/digital sensors. And then we will show how to create advanced sensors that use Digital Signal Processing and Machine Learning to analyze microphone, accelerometer or camera data\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1716, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Jon Nordby" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/nCmBJJHGQKo/maxresdefault.jpg", + "title": "Microcontrollers + Machine Learning with MicroPython in 1-2-3", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=nCmBJJHGQKo" + } + ] +} diff --git a/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json b/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json new file mode 100644 index 000000000..97699eaa0 --- /dev/null +++ b/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Faustream is an open-source tool I developed that bridges the gap between streaming data and real-time predictive analytics. This talk explores how Faustream leverages Python, Kafka, and Faust to handle high-velocity data streams while applying machine learning models in real-time. We'll dive into its architecture, key features, and applications, demonstrating how it can revolutionize data processing across industries.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1063, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Joseph Oladokun" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/exHRTSGZtAo/maxresdefault.jpg", + "title": "Bridging the Gap: Real-Time Predictive Analytics with Faustream", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=exHRTSGZtAo" + } + ] +} diff --git a/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json b/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json new file mode 100644 index 000000000..db352c0c1 --- /dev/null +++ b/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json @@ -0,0 +1,28 @@ +{ + "description": "Data quality is a crucial factor that significantly impacts the performance of machine learning models. However, many data scientists often overlook or underestimate the hidden costs associated with poor data quality. This talk will highlight common data challenges, and discuss their implications for model accuracy and reliability. Attendees will learn practical strategies to identify, assess, and improve data quality, ensuring their machine learning projects yield better results.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1861, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Kalyan Prasad" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/2ugMOAYwLpQ/maxresdefault.jpg", + "title": "The Hidden Costs of Data Quality - Tackling Common Data Challenges in ML", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=2ugMOAYwLpQ" + } + ] +} diff --git a/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json b/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json new file mode 100644 index 000000000..bf486f66a --- /dev/null +++ b/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json @@ -0,0 +1,29 @@ +{ + "description": "Writing GPU code in Python is easier today than ever, and in this tutorial, we will cover how you can get started with accelerating your code.\n\nYou don't need to learn C++ and you don't need new development tools.\n\nAttendees will be expected to have a general knowledge of Python and programming concepts, but no GPU experience will be necessary. Our key takeaway for attendees will be the knowledge that they don’t have to do much differently to get their code running on a GPU\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 5341, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Katrina Riehl", + "Jacob Tomlinson" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/rfXgtUYF3lw/maxresdefault.jpg", + "title": "GPU development with Python 101", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=rfXgtUYF3lw" + } + ] +} diff --git a/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json b/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json new file mode 100644 index 000000000..a8207c92f --- /dev/null +++ b/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json @@ -0,0 +1,34 @@ +{ + "description": "Discover why the Unix command line remains a powerful and relevant tool for data scientists, even in a Python-dominated landscape. This talk will demonstrate how embracing the command line and leveraging its many tools can significantly enhance your productivity, streamline data workflows, and complement your Python skills.\n\nJeroen Janssens, PhD, is a polyglot data science consultant and certified instructor. His expertise lies in visualizing data, implementing machine learning models, and building solutions using Python, R, JavaScript, and Bash. Jeroen is passionate about open source and sharing knowledge. He is the author of Data Science at the Command Line (O’Reilly, 2021) and is currently writing Python Polars: The Definitive Guide (O’Reilly, 2025). Every now and then he blogs at https://jeroenjanssens.com.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 3266, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://jeroenjanssens.com.", + "url": "https://jeroenjanssens.com." + } + ], + "speakers": [ + "Jeroen Janssens" + ], + "tags": [ + "Keynote" + ], + "thumbnail_url": "https://i.ytimg.com/vi/siPGvvrfylQ/maxresdefault.jpg", + "title": "KEYNOTE: Embrace the Unix Command Line and Supercharge Your PyData Workflow", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=siPGvvrfylQ" + } + ] +} diff --git a/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json b/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json new file mode 100644 index 000000000..fcf517a5e --- /dev/null +++ b/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json @@ -0,0 +1,30 @@ +{ + "description": "Join us for an exciting keynote from Peter Wang\n\nIn this talk, Peter walks through some of the most interesting learnings from the last few years of AI, as well as lessons learned over the last decade of Python's adoption for data science, in an effort to answer the question, \"What is the role of the open data science movement in the era of AI?\"\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 3786, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Peter Wang" + ], + "tags": [ + "Keynote" + ], + "thumbnail_url": "https://i.ytimg.com/vi/3hSjftUjmWk/maxresdefault.jpg", + "title": "Do Python and Data Science Matter in Our AI Future?", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=3hSjftUjmWk" + } + ] +} diff --git a/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json b/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json new file mode 100644 index 000000000..65901a22d --- /dev/null +++ b/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json @@ -0,0 +1,30 @@ +{ + "description": "skchange is a python compatible framework library for detecting anomalies, changepoints in time series, and segmentation.\n\nskchange is based on and extends sktime, the most widely used scikit-learn compatible framework library for learning with time series. Both packages are maintained under permissive license, easily extensible by anyone, and interoperable with the python data science stack.\n\nThis workshop gives a hands-on introduction to the new joint detection interface developed in skchange and sktime, for detecting point anomalies, changepoints, and segment anomalies, in unsupervised, semi-supervised, and supervised settings.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 5362, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Franz Kiraly", + "Christopher Risi", + "Martin Tveten" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/VwhevNkxjYw/maxresdefault.jpg", + "title": "sktime: time series anomaly detection, changepoint detection, segmentation", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=VwhevNkxjYw" + } + ] +} diff --git a/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json b/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json new file mode 100644 index 000000000..d15ebf5c6 --- /dev/null +++ b/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json @@ -0,0 +1,29 @@ +{ + "description": "Feature selection is an essential process in machine learning, especially when dealing with high-dimensional datasets. It helps reduce the complexity of machine learning models, improve performance, mitigate overfitting, and decrease computation time. This talk will present a novel open source feature selection framework, shap-select.\nShap-select is noteworthy because of its simplicity - it requires only one fit of the model for which one does feature selection, and yet performs comparably to much heavier methods. It conducts a linear or logistic regression of the target on the Shapley values of the features, on the validation set, and uses the signs and significance levels of the regression coefficients to implement an efficient heuristic for feature selection in tabular regression and classification tasks.\nWe compare this to several other methods, showing that shap-select combines interpretability, computational efficiency, and performance, offering a robust solution for feature selection, especially for real-world cases where model fitting is computationally expensive.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1625, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Baran Köseoğlu", + "Egor Kraev" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/pmqvyrIyB_8/maxresdefault.jpg", + "title": "Fast, intuitive feature selection via regression on Shapley values", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=pmqvyrIyB_8" + } + ] +} diff --git a/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json b/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json new file mode 100644 index 000000000..32a076774 --- /dev/null +++ b/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json @@ -0,0 +1,28 @@ +{ + "description": "As the climate changes, farmers in Africa are facing enormous challenges, from unpredictable rainfall to shifting growing seasons. In this session, I will share how we can use machine learning (ML) models, built on open-source platforms like TensorFlow and Google Earth Engine, to predict crop yields for key staples such as maize and cassava. By looking at case studies from Kenya, Ghana, and Malawi, I'll show how ML is helping farmers decide when to plant, manage resources more efficiently, and reduce climate risks. I’ll also talk about practical tools—like community hubs, radio broadcasts, and SMS alerts—that ensure even non-literate farmers can use these insights. Expect to walk away with actionable ideas on how to implement these techniques in your own work on food security.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1675, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Kristal Joi Wise" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/wMwEmlhyYh0/maxresdefault.jpg", + "title": "Harnessing Machine Learning to Improve Agricultural Resilience in Africa", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=wMwEmlhyYh0" + } + ] +} diff --git a/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json b/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json new file mode 100644 index 000000000..21aad9308 --- /dev/null +++ b/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "This talk will uncover the power of AI in combating Amazon deforestation through an innovative cattle detection system. We present a cutting-edge approach to monitoring illegal ranching, a primary driver of deforestation, using very high-resolution satellite imagery and deep learning. We'll dive into the unique challenges of detecting cattle from space – from congested scenes with small, clustered targets to diverse and cluttered backgrounds – and how we overcame them with a two-step neural network approach. By combining classification and density estimation techniques, our model efficiently identifies potential cattle locations and estimates herd sizes across varied landscapes. Discover how this interdisciplinary project, developed in collaboration with Brazilian prosecutors, leverages data science to drive real-world impact in environmental conservation and sustainable land management. Join us to explore the intersection of computer vision, geospatial analysis, and environmental advocacy, and learn how AI can be a powerful tool in the fight against deforestation in the Amazon and beyond.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1790, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Leonie Hodel" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/1uex29BVbgg/maxresdefault.jpg", + "title": "Using AI to Spot Deforestation-related Cows on Satellite Images", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=1uex29BVbgg" + } + ] +} diff --git a/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json b/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json new file mode 100644 index 000000000..f091e7140 --- /dev/null +++ b/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Data scientists in the real world have to manage messy datasets that evolve over time. New data must be added, old data must be removed and changes to columns must be handled gracefully. Furthermore, many real world datasets grow from a size that works on a laptop to a size that must run on a server. This talk will show that in Python we can meet all these challenges in a simple and scalable way using the delta-rs package to manage the data storage and Polars to read and write the dataset.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1710, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Liam Brannigan" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/ZIrq9GsN2HM/maxresdefault.jpg", + "title": "Build simple & scalable data pipelines with Polars & DeltaLake", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ZIrq9GsN2HM" + } + ] +} diff --git a/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json b/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json new file mode 100644 index 000000000..caa9b4180 --- /dev/null +++ b/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json @@ -0,0 +1,29 @@ +{ + "description": "By unifying PySpark's robust big data processing/analyzing capability with Lance's multimodal AI data lake, data engineers and scientists can efficiently manage and analyze the diverse data types required for cutting-edge AI applications within a familiar big data framework.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1617, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Lu Qiu", + "Allison Wang" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/noZNcpYRrkk/maxresdefault.jpg", + "title": "Empowering PySpark with Lance Format for Multi-Modal AI Data Pipelines", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=noZNcpYRrkk" + } + ] +} diff --git a/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json b/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json new file mode 100644 index 000000000..c4d578800 --- /dev/null +++ b/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Transformers are everywhere: NLP, Computer Vision, sound generation and even protein-folding. Why not in forecasting? After all, what ChatGPT does is predicting the next word. Why this architecture isn't state-of-the-art in the time series domain?\n\nIn this talk, you will understand how Amazon Chronos and Salesforece's Moirai transformer-based forecasting models work, the datasets used to train them and how to evaluate them to see if they are a good fit for your use-case.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1251, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Luca Baggi" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/DZICL_8vdXI/maxresdefault.jpg", + "title": "Foundational Models for Time Series Forecasting: are we there yet?", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=DZICL_8vdXI" + } + ] +} diff --git a/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json b/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json new file mode 100644 index 000000000..5e0dcb46a --- /dev/null +++ b/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "What if deploying a Python app was as simple as a single click, and came at zero cost? With PyCafe, you can offer users live, interactive examples of your libraries or have them submit reproducible examples when reporting issues.\nBuilt on top of Pyodide, PyCafe runs countless web frameworks (e.g. streamlit, dash, panel, gradio) directly in the browser. By making apps easy to create, share, and edit, PyCafe opens up new workflows, including possibilities we may not have even imagined yet.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1653, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Maarten Breddels" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/-adJy4MxZgE/maxresdefault.jpg", + "title": "Python Apps in the Browser made simple by PyCafe", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=-adJy4MxZgE" + } + ] +} diff --git a/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json b/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json new file mode 100644 index 000000000..2800660a4 --- /dev/null +++ b/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json @@ -0,0 +1,28 @@ +{ + "description": "How do you know when a user experience isn’t hitting the mark? Do you wait for it to show up in qualitative feedback? Do you have a long list of different metrics that you have to keep track of that could potentially signal a problem? When evaluating user experiences, how can you quantify if it’s a good experience or not? Additionally, how do you know if your good or bad experience is impacting other areas of the business?\n\nThese are common problems for product managers and the data scientists and analysts who support them. To solve them, I propose creating an aggregate metric that represents the effort or friction experienced by your users - a User Effort Index.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1748, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Maggie Wolff" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/2-0iWgVC2oc/maxresdefault.jpg", + "title": "Measuring the User Experience and the Impact of Effort on Business Outcomes", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=2-0iWgVC2oc" + } + ] +} diff --git a/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json b/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json new file mode 100644 index 000000000..17caf4bc2 --- /dev/null +++ b/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json @@ -0,0 +1,32 @@ +{ + "description": "Data Prep Kit (https://github.com/IBM/data-prep-kit) is a new open source python library to help you wrangle and clean your data for generative AI applications (de-dupe, detect language, removing PII, detect malware, creating embeddings, etc.)\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 4647, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://github.com/IBM/data-prep-kit", + "url": "https://github.com/IBM/data-prep-kit" + } + ], + "speakers": [ + "Sujee Maniyam" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/GVA1XK0jrf8/maxresdefault.jpg", + "title": "Preparing Data for LLM Applications Using Data Prep Kit", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=GVA1XK0jrf8" + } + ] +} diff --git a/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json b/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json new file mode 100644 index 000000000..05bb39967 --- /dev/null +++ b/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json @@ -0,0 +1,28 @@ +{ + "description": "Have you ever wanted to understand LLM internals such as pre-training, supervised fine-tuning, instruction-tuning, reinforcement learning with human feedback, parameter efficient fine-tuning, expanding LLM context lengths, attention mechanism variants, model deployment performance, and cost optimization, which GPUs to use when and more? This talk will take an end-to-end review of the LLM training and deployment pipeline to give you both a stronger intuition and a faster path to implementation using model training and deployment frameworks.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1773, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Mark Moyou" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/V2L6hufE2X4/maxresdefault.jpg", + "title": "Understanding the end-to-end LLM training and inference pipeline", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=V2L6hufE2X4" + } + ] +} diff --git a/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json b/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json new file mode 100644 index 000000000..313258e47 --- /dev/null +++ b/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "We present “akimbo”, a library bringing a numpy-like API and vector-speed processing to dataframes on the CPU or GPU. When your data is more complex than simple one-dimensional columns, this is the most natural way to perform selection, mapping and aggregations without iterating over python objects, saving a large factor in memory and processing time.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1787, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Martin Durant" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/thfNEGCuwbY/maxresdefault.jpg", + "title": "akimbo: vectorized processing of nested/ragged dataframe columns", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=thfNEGCuwbY" + } + ] +} diff --git a/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json b/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json new file mode 100644 index 000000000..d6ab1e8b4 --- /dev/null +++ b/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "\"What training data do you need, don't you just train on the whole internet?\"\n\"Doesn't data production rely heavily on outsourcing to cheap labour markets in the Global South?\"\n\"Isn't all training data just synthetic nonsense generated by LLMs nowadays, how can you expect a model to learn anything worthwhile?\"\n\nThese are all questions that I regularly get, when I tell people I work on building foundational LLMs. Because as often as we use LLMs in our daily lives nowadays, people generally know very little of the data that went into the LLM to train it.\n\nIn this talk, I'll address these questions and hope to build an understanding of what it takes to build an LLM from scratch, from a data perspective.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1670, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Marysia Winkels" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/uV3HLROlcLM/maxresdefault.jpg", + "title": "The Data That Shapes Foundational LLMs", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=uV3HLROlcLM" + } + ] +} diff --git a/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json b/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json new file mode 100644 index 000000000..b3824219c --- /dev/null +++ b/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "The upcoming release of Apache Spark 4.0 delivers substantial enhancements that refine the functionality and augment the developer experience with the Spark unified analytics engine.\n\nAttendees will learn how to use Apache Spark 4.0's advancements for optimized data processing and analytics\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1735, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Matthew Powers" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/zBy3-NiylY8/maxresdefault.jpg", + "title": "New Features in Apache Spark 4.0", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=zBy3-NiylY8" + } + ] +} diff --git a/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json b/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json new file mode 100644 index 000000000..0761999f1 --- /dev/null +++ b/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "“I like waiting for my build jobs,” said no one ever. CI is an essential part of ensuring quality, helping to highlight new issues before they might be merged into the main codebase. CI gives us confidence that the code changes being proposed don’t break things, as least as far as our tests cover. That confidence comes at the cost of time and compute resources.\n\nThe RAPIDS team at NVIDIA manages its own operations and compute resources. Those resources are limited, of course, so we wait our turn and put the toys back when we’re done.. It is essential to us that we are using our resources as efficiently as possible. This is the “Speed of Light” principle at NVIDIA: how close are you to a theoretical optimal limit? For CI, this involves several factors: startup wait time, docker image setup time, cache utilization, build tool processes, and limiting unnecessary redoing builds and tests for things that haven’t changed. The RAPIDS team set out to add telemetry to all of our builds, so that we can quantify where we are spending our time and compute resources, and ensure that we are spending them wisely. We’ll demonstrate the telemetry tools that we’re using, and show how you can add them to your build jobs.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1784, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Michael Sarahan" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/fasxVtDQgK0/maxresdefault.jpg", + "title": "Going Plaid: Striving for Speed of Light in CI pipelines", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=fasxVtDQgK0" + } + ] +} diff --git a/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json b/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json new file mode 100644 index 000000000..d9549d2f8 --- /dev/null +++ b/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json @@ -0,0 +1,28 @@ +{ + "description": "This hands-on tutorial guides participants through the process of constructing the essential components of a Machine Learning Platform (MLP) from scratch. We'll focus on implementing five core elements: a feature store, model registry, orchestrator, inference engine, and basic monitoring system. The session emphasizes practical, hands-on coding using Test-Driven Development (TDD), Domain Driven Design, and hexagonal architecture principles providing attendees with a functional foundation for a robust ML infrastructure.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 5359, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Nathan Colbert" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/XkNeCavaJtw/maxresdefault.jpg", + "title": "From Inference to Features: Build a Core ML Platform from Scratch", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=XkNeCavaJtw" + } + ] +} diff --git a/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json b/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json new file mode 100644 index 000000000..26f57ae82 --- /dev/null +++ b/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Plotnine is a Python library that implements the Grammar of Graphics, enabling users to create complex, layered plots. This talk covers techniques for customising your plots, using time series data as an example, and highlights how plotnine integrates with matplotlib, allowing you to enhance your data visualisations for better storytelling.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 2062, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Nicola Rennie" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/NBGJuaBF2rc/maxresdefault.jpg", + "title": "Practical Techniques for Polished Visuals with Plotnine", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=NBGJuaBF2rc" + } + ] +} diff --git a/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json b/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json new file mode 100644 index 000000000..1197b356c --- /dev/null +++ b/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Tenova, as an innovative engineering company, collaborates closely with its client-partners to create advanced technologies and services that optimize business operations.\n\nThis talk discusses the deployment of our image recognition system to identify and mitigate potential hazards on steel plants, specifically focusing on the detection of bulky steel pieces.\nThe system was deployed on-premise using an edge device and an IP camera, supported by Azure IoT Edge and a Flask API for image processing and prediction.\nA recent migration to a RabbitMQ-based architecture using Pika enhanced scalability and communication.\n\nThe presentation will cover technical strategies, the challenges (like offline functionality and real-time, low-latency hazard detection) and the positive impact of the system on workplace safety and operational efficiency.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1441, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Nicolò Giso" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/G8ypUIlvlEg/maxresdefault.jpg", + "title": "Image Recognition for safety on the factory floor", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=G8ypUIlvlEg" + } + ] +} diff --git a/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json b/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json new file mode 100644 index 000000000..59f8a5fc5 --- /dev/null +++ b/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Geospatial data is more important than ever for tackling real-world challenges like urban planning and climate change. This tutorial teaches you how to use tools like CesiumJS and Python to turn raw data into interactive 3D visuals. It’s a hands-on way to bring data to life and try to make an impact.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 5389, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Nompumelelo Mtsweni" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/-6vh5vMgPHA/maxresdefault.jpg", + "title": "3D geospatial data visualization using Python and Cesiumjs", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=-6vh5vMgPHA" + } + ] +} diff --git a/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json b/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json new file mode 100644 index 000000000..196830405 --- /dev/null +++ b/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json @@ -0,0 +1,28 @@ +{ + "description": "This talk focuses on the underrepresentation of women in AI and data science, where only 22% of AI professionals are women. We will explore how addressing the missing 78% is critical to creating inclusive, innovative solutions that benefit society as a whole. Attendees will learn about the current challenges women face, the importance of diverse perspectives in AI development, and actionable strategies for empowering women in the field through community engagement, mentorship, and data-driven policies.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1789, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Noor Aftab" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/2k72xRc67wE/maxresdefault.jpg", + "title": "The Missing 78%: How Women in AI & Data Can Complete the Future of Innovation", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=2k72xRc67wE" + } + ] +} diff --git a/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json b/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json new file mode 100644 index 000000000..d27b647b4 --- /dev/null +++ b/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "GraphRAG is a popular way to use KGs to ground AI apps. Most GraphRAG tutorials use LLMs to build graph automatically from unstructured data. However, what if you're working on use cases such as investigative journalism and sanctions compliance -- \"catching bad guys\" -- where transparency for decisions and evidence are required?\n\nThis talk explores how to leverage open data, open models, and open source to build investigative graphs which are accountable, exploring otherwise hidden relations in the data that indicate fraud or corruption. This illustrates techniques used in production use cases for anti-money laundering (AML), ultimate beneficial owner (UBO), rapid movement of funds (RMF), and other areas of sanctions compliance in general.\n\nThis approach uses Python open source libraries, e.g., the KùzuDB graph database and LanceDB vector database. For each NLP task we use state-of-the-art open models (mostly not LLMs) emphasizing how to tune for a domain context: named entity recognition, relation extraction, textgraph, entity linking, as well as entity resolution to merge structured data and produce a semantic overlay that organizes the graph.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1750, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Paco Nathan" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/Nrsh6LzUk6A/maxresdefault.jpg", + "title": "Catching Bad Guys using open data and open models for graphs", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Nrsh6LzUk6A" + } + ] +} diff --git a/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json b/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json new file mode 100644 index 000000000..bfc6efd17 --- /dev/null +++ b/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "CSP is a newly open-sourced library for stream processing in Python. In this talk, we discuss how CSP can be leveraged to handle all stages of an online machine learning pipeline from feature generation to live training and inference\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1721, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Pascal Tomecek" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/f5G8OVuRI3k/maxresdefault.jpg", + "title": "Leveraging CSP for Live Inference", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=f5G8OVuRI3k" + } + ] +} diff --git a/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json b/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json new file mode 100644 index 000000000..6a2e27f88 --- /dev/null +++ b/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Many organizations are eager to build and deploy their own large language models (LLMs), but validating them can feel frustrating and incomplete. Fortunately, as data scientists we are experts in model diagnostics, and we can extend these same principles to LLM validation. In this talk, I will present a scientific approach to evaluating custom text generation models in Python across several dimensions such as safety, coherence, and correctness.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1763, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Patrick Deziel" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/vxSRIL1WD9g/maxresdefault.jpg", + "title": "Putting the data science back into LLM evaluation", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=vxSRIL1WD9g" + } + ] +} diff --git a/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json b/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json new file mode 100644 index 000000000..8f0059130 --- /dev/null +++ b/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json @@ -0,0 +1,28 @@ +{ + "description": "This talk will go over an application scenario that brings together the benefits of vector search with graph traversal. Knowledge graphs (or more generally, graphs), have long been used to model structured data that capture the connection between entities in the real world. Recently, there has been a lot of interest in the topic of Graph RAG, which aims to use graphs as part of the retrieval process in RAG, to enhance the outcomes. The talk will cover a practical example to showcase how Python developers can leverage the PyData ecosystem alongside two open source, embedded databases: Kùzu for the graph component, and LanceDB for the vector component of the retrieval.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1787, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Prashanth Rao" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/ky2yufsffas/maxresdefault.jpg", + "title": "Graph RAG: Bringing together graph and vector search to empower retrieval", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ky2yufsffas" + } + ] +} diff --git a/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json b/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json new file mode 100644 index 000000000..ff3661e94 --- /dev/null +++ b/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Unlike stylized machine learning examples in textbooks and lectures, data are often not readily available to be used to train models and gain insight in real-world applications; instead, practitioners are required to collect those data themselves.\nHowever, data annotation can be expensive (in terms of time, money, or some safety-critical conditions), thus limiting the amount of data we can possibly obtain.\n(Examples include eliciting an online shopper's preference with ads at the risk of being intrusive, or conducting an expensive survey to understand the market of a given product.)\nFurther, not all data are created equal: some are more informative than others.\nFor example, a data point that is similar to one already in our training set is unlikely to give us new information; conversely, a point that is different from the data we have thus far could yield novel insight.\nThese considerations motivate a way for us to identify the most informative data points to label and gain knowledge in a way that makes use of our labeling budget as effectively as possible.\nBayesian experimental design (BED) formalizes this framework, leveraging the tools from Bayesian statistics and machine learning to answer the question: which data point is the most valuable that should be labeled to improve our knowledge?\n\nThis talk serves as a friendly introduction to BED including its motivation as discussed above, how it works, and how to implement it in Python.\nDuring our discussions, we will show that interestingly, binary search, a popular algorithm in computer science, is a special case of BED.\nData scientists and ML practitioners who are interested in decision-making under uncertainty and probabilistic ML will benefit from this talk.\nWhile most background knowledge necessary to follow the talk will be covered, the audience should be familiar with common concepts in ML such as training data, predictive models, and common probability distributions (normal, uniform, etc.)\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1878, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Quan Nguyen" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/zssnoI2JvTo/maxresdefault.jpg", + "title": "Cost-effective data annotation with Bayesian experimental design", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=zssnoI2JvTo" + } + ] +} diff --git a/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json b/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json new file mode 100644 index 000000000..b632014d3 --- /dev/null +++ b/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json @@ -0,0 +1,32 @@ +{ + "description": "Data deduplication is a ubiquitous data quality problem that most data people will encounter at some point in their career. It happens whenever multiple records are collected about the same person or other entity without a unique identifier that ties these records together.\n\nThis talk provides beginners with everything they need to start linking and deduping large datasets using Splink (https://github.com/moj-analytical-services/splink), a free Python library.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1644, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://github.com/moj-analytical-services/splink", + "url": "https://github.com/moj-analytical-services/splink" + } + ], + "speakers": [ + "Robin Linacre" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/eQtFkI8f02U/maxresdefault.jpg", + "title": "Rapid deduplication and fuzzy matching of large datasets using Splink", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=eQtFkI8f02U" + } + ] +} diff --git a/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json b/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json new file mode 100644 index 000000000..63928ed4f --- /dev/null +++ b/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json @@ -0,0 +1,32 @@ +{ + "description": "Polars (https://github.com/pola-rs/polars) boasts 18 different data types, not including variants of numerical types.\n\nDo we really need such a vast collection of data types?\n\nWhat is the use case for each type?\n\nWhat is the difference between List and Array? Or between Categorical and Enum? And why on Earth would I ever need a Struct?\n\nThis talk will clear up all of these questions and more, as we go through the data types that Polars provides and understand why we need each one of them.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1829, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/pola-rs/polars", + "url": "https://github.com/pola-rs/polars" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Rodrigo Girão Serrão" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/8HwfVVknhP4/maxresdefault.jpg", + "title": "Understanding Polars data types", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=8HwfVVknhP4" + } + ] +} diff --git a/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json b/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json new file mode 100644 index 000000000..85fcc4c5f --- /dev/null +++ b/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Asynchronous programming can be intimidating for many due to its unique syntax, paradigm, and different behavior in environments like IPython and Jupyter notebooks.\n\nBut it’s not that complicated—and I'll prove it. In this talk, I will demystify the basics, along with some advanced concepts, from a practical perspective. By the end, you'll be ready to get started and implement significant performance improvements in your network or I/O-bound code.\n\nAttend this talk if you’ve been intimidated by async and await for a while and are ready to change that.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1808, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Ryan Varley" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/oy7sEAfJsWw/maxresdefault.jpg", + "title": "Let's get you started with asynchronous programming", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=oy7sEAfJsWw" + } + ] +} diff --git a/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json b/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json new file mode 100644 index 000000000..e9d56fd82 --- /dev/null +++ b/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Large Language Models are great at writing and chatting, but are they also able to talk like a human? Today, modern LLM-based voice bots can listen to users, talk back to them with a realistic voice, handle interruptions and improvise, while sticking to the goal they're given by their builders. And this is not only true for the latest, eye-watering expensive OpenAI's models! In this session we will learn how modern voice bots are made, which open source tools are available to build them, and we are going to see in practice how to build one. At the end of the session, the demo's full source code will be shared with the audience.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1925, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Sara Zanzottera" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/Td5dFdG0wE4/maxresdefault.jpg", + "title": "Building LLM Voice Bots with Open Source Tools", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Td5dFdG0wE4" + } + ] +} diff --git a/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json b/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json new file mode 100644 index 000000000..f7a15d0f1 --- /dev/null +++ b/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json @@ -0,0 +1,32 @@ +{ + "description": "The R Development Guide (R Dev Guide) (https://contributor.r-project.org/rdevguide/) serves as a resource for onboarding new contributors to the R project. Initially drafted in 2021 and then expanded during the Google Season of Docs 2022, the guide has evolved to make contributing more accessible, especially for newcomers. This talk will explore the latest developments in the guide, its impact on the R community, and how it fosters inclusivity within the project by simplifying the contribution process.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1192, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://contributor.r-project.org/rdevguide/", + "url": "https://contributor.r-project.org/rdevguide/" + } + ], + "speakers": [ + "Saranjeet Kaur Bhogal" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/l5BwwvQlwG0/maxresdefault.jpg", + "title": "Empowering New Contributors: The Evolving Role of the R Development Guide", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=l5BwwvQlwG0" + } + ] +} diff --git a/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json b/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json new file mode 100644 index 000000000..f712d3eeb --- /dev/null +++ b/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json @@ -0,0 +1,28 @@ +{ + "description": "Data rules the world and data-scientists / MLEs across academia and industry are creating new and innovative ways to glean insights which have changed our lives through easy to understand and intuitive interfaces. At the heart of the AI / ML revolution ( genAI, LLMs, bioinformatics, climate science etc ) is the availability and elasticity of state of the art hardware which enables processing large swaths of data ( TBs ) that could not run on local laptops for want of compute/memory. Cloud providers have commoditized these powerful machines to the extent that they are now available to every person with a few clicks.\n\nCloud computing allows us to tradeoff upfront hardware costs for granular operational expenses such as renting GPUs by the second. Prima facie this might seem like a winning formula, a key downside is that these costs often add up uncontrollably. Attributing the usage of such hardware to Data/AI/ML jobs across dimensions like cloud accounts, instances, workloads down to the lowest level of granularity, can help provide transparency to not only cost albeit resource management as well.\n\nThrough our work with open-source Metaflow, which started at Netflix in 2017, we have had an opportunity to help customers place their cloud spend in the context of value produced by individual projects combined with more granular resource management to limit spend.\n\nIn this talk, we will provide an overview of the lessons we have learnt in our quest to get a better handle on costs by using Metaflow. We will share best practices to consider when writing AI/ML workloads and how constructs in the Metaflow framework can be used to answer questions Data-Scientists/MLE’s ask themselves such as:\n\nHow do my cloud costs break down over time and what workloads/cloud instances are driving these costs?\nAre the workloads executing tuned to allow maximum usage of these expensive resources?\nHow can I refactor my workloads such that the expensive resources are used to their optimal capacity?\nIn particular, we'll focus on best practices to follow when working with large datasets in a distributed multi cloud / cluster environments, and how Metaflow constructs can help achieve that in a human friendly manner, with very few lines of code.\n\nThe audience will be empowered to build and deploy production-grade Data/AI/ML pipelines while learning strategies on how to optimize workloads to keep expensive ML/AI operations under control. Finally, the audience will have the tools to answer questions like “Am I using my resources to their fullest extent? If not, what are the opportunities for tuning my AI/ML jobs resource requirements, to bin pack hardware and subsequently reduces overall costs”\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1813, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Saurabh Garg" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/YYZ6vcumojo/maxresdefault.jpg", + "title": "Navigating Cloud Expenses in Data & AI: Strategies for Scientists and Engineers", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=YYZ6vcumojo" + } + ] +} diff --git a/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json b/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json new file mode 100644 index 000000000..18cc748a0 --- /dev/null +++ b/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "What if designing data workflows felt like snapping together LEGO blocks? In this talk, we’ll explore how open-source tools enable flexible, modular PyData workflows. We’ll discuss why open source is essential for avoiding vendor lock-in and how to integrate libraries and frameworks within the Python ecosystem, alongside tools like GitHub Actions. Plus, I’ll introduce DataJourney, an open-source toolkit I developed that makes designing workflows as fun and creative as building with LEGO.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1829, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Sayantika Banik" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/KdgegsH3rAQ/maxresdefault.jpg", + "title": "The LEGO Approach to designing PyData Workflows", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=KdgegsH3rAQ" + } + ] +} diff --git a/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json b/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json new file mode 100644 index 000000000..5209bc8b1 --- /dev/null +++ b/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "The nvmath-python is a new way of delivering NVIDIA accelerated Math Libraries to Python users: researchers-practitioners, library and framework developers, and optimized GPU kernel developers. In this talk we will provide an introduction to the library design goals, its architecture, overview of the key features along with its usage examples.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1646, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Sergey Maydanov" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/ABao7JTDTMI/maxresdefault.jpg", + "title": "Bringing NVIDIA math libraries to Python scientific community", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ABao7JTDTMI" + } + ] +} diff --git a/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json b/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json new file mode 100644 index 000000000..64d768bef --- /dev/null +++ b/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "As organizations increasingly integrate and adopt AI and machine learning internally, the challenge of maintaining separate pipelines for ML-powered systems and conventional software makes it difficult for DevOps teams to maintain these separate pipelines. This talk explores a unified approach to DevOps and MLOps, demonstrating how existing DevOps pipelines can be transformed into efficient MLOps pipelines using ModelKits with KitOps\n\nWe'll begin by examining the reasons behind the traditional separation of DevOps and MLOps pipelines, including differences in project nature, required expertise, and the size and complexity of artifacts. We'll then delve into the challenges posed by separate pipelines, such as increased costs, coordination difficulties, and accumulating technical debt. Thus the attendees will learn how to leverage open source tooling like KitOps to create a unified pipeline that accommodates both traditional software and ML-powered projects, ultimately leading to more efficient and cost-effective operations.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1822, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Shivay Lamba" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/eExvPkSqTCQ/maxresdefault.jpg", + "title": "Streamlining AI development and Deployment with KitOps", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=eExvPkSqTCQ" + } + ] +} diff --git a/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json b/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json new file mode 100644 index 000000000..29529e268 --- /dev/null +++ b/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Anomaly detection is hardly a new problem, nor is the progress in it as rapid as the LLM blast we’re witnessing today. But it is pressing.\n\nIn this talk, we’ll talk about a realtime anomaly detection pipeline on time series data and discuss the nitty-gritties of the algorithm knobs that help us build an unbiased and reliable system, which includes 1) using NeuralProphet, an open source framework, to forecast for time series data and 2) using robust techniques to detect true anomalies using forecasting errors.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1807, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Shreya Khurana" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/ca4w2ZIZ0S0/maxresdefault.jpg", + "title": "Realtime Time Series Anomaly Detection in Production", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ca4w2ZIZ0S0" + } + ] +} diff --git a/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json b/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json new file mode 100644 index 000000000..203c998a3 --- /dev/null +++ b/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json @@ -0,0 +1,28 @@ +{ + "description": "The paid search landscape is undergoing a remarkable transformation, evolving from traditional keyword-centric strategies to a more nuanced approach that prioritizes audience targeting. This shift is not just a trend; it’s a response to the ever-increasing demand for precision and effectiveness in reaching potential customers in a crowded digital marketplace.\n\nAt the forefront of this evolution is our innovative automated system designed to identify high-intent users through sophisticated batch processing of their website behaviour. By harnessing the power of machine learning, we create a dynamic layer that curates smarter audiences those that closely resemble our most valuable converted customers. This enables us to execute precise retargeting campaigns that not only drive meaningful engagement but also optimize marketing budgets, resulting in enhanced audience selection and significantly higher conversion rates.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1685, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Shrikanth Singh" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/d1SaxtX7L6E/maxresdefault.jpg", + "title": "Automating SEA Retargeting for Smarter Audience Engagement and Higher Conversions", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=d1SaxtX7L6E" + } + ] +} diff --git a/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json b/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json new file mode 100644 index 000000000..f6f9ebb69 --- /dev/null +++ b/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "This talk explores how to align large language models (LLMs) with human values via preference learning (PL) in the presence of challenges such as incomplete and corrupted data in preference datasets. We propose a novel method for recalibrating values to tackle these issues, enhancing LLM resilience by improving the robustness of existing models. The session highlights real-world experiments that show how the method addresses adversarial noise and unobserved comparisons, making it essential for building more reliable, ethically aligned AI systems.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1921, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Son The Nguyen" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/E01yrxkmWvM/maxresdefault.jpg", + "title": "Improve LLMs Alignment with Complete and Robust Preference Data", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=E01yrxkmWvM" + } + ] +} diff --git a/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json b/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json new file mode 100644 index 000000000..967f68495 --- /dev/null +++ b/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Today we will learn how to build an application around sensor data, REST Feeds, weather data, traffic cameras and vector data. We will write a simple Python application to collect various structured, semistructured data and unstructured data, We will process, enrich, augment and vectorize this data and insert it into a Vector Database to be used for semantic hybrid search and filtering. We will then build a Jupyter notebook to analyze, query and return this data.\n\nAlong the way we will learn the basics of Vector Databases and Milvus. While building it we will see the practical reasons we choose what indexes make sense, what to vectorize, how to query multiple vectors even when one is an image and one is text. We will see why we do filtering. We will then use our vector database of Air Quality readings to feed our LLM and get proper answers to Air Quality questions. I will show you how to all the steps to build a RAG application with Milvus, LangChain, Ollama, Python and Air Quality Reports. Finally after demos I will answer questions, provide the source code and additional resources including articles.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 5385, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Timothy Spann" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/IJuzKZdiLCg/maxresdefault.jpg", + "title": "It's in the Air Tonight. Sensor Data in RAG", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=IJuzKZdiLCg" + } + ] +} diff --git a/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json b/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json new file mode 100644 index 000000000..bac337703 --- /dev/null +++ b/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json @@ -0,0 +1,32 @@ +{ + "description": "data.table is an R package with C code that is one of the most efficient open-source in-memory data manipulation packages available today. First released to CRAN by Matt Dowle in 2006, it continues to grow in popularity, and now over 1500 other CRAN packages depend on data.table. This talk will start with data reading from CSV, discuss basic and advanced data manipulation topics, and finally will end with a discussion about how you can contribute to data.table.\n\nhttps://github.com/tdhock/2023-10-LatinR-data.table?tab=readme-ov-file#english\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 4098, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://github.com/tdhock/2023-10-LatinR-data.table?tab=readme-ov-file#english", + "url": "https://github.com/tdhock/2023-10-LatinR-data.table?tab=readme-ov-file#english" + } + ], + "speakers": [ + "Toby Dylan Hocking" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/l_7FXnppu-g/maxresdefault.jpg", + "title": "Using and contributing to the data.table package for efficient big data analysis", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=l_7FXnppu-g" + } + ] +} diff --git a/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json b/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json new file mode 100644 index 000000000..507a8248f --- /dev/null +++ b/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json @@ -0,0 +1,28 @@ +{ + "description": "Generative AI is revolutionizing industries by enhancing efficiency, personalization, and insight. This talk explores how a robust Python ecosystem, including Streamlit, various libraries, and APIs, is harnessed to build powerful generative AI applications. Attendees will gain insights into the practical implementation of these technologies and their transformative impact on business operations.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 2159, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Tony Ojeda" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/I7jVu-fHavI/maxresdefault.jpg", + "title": "Generative AI + Python: Unlocking Efficiency, Personalization, and Insight", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=I7jVu-fHavI" + } + ] +} diff --git a/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json b/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json new file mode 100644 index 000000000..c5b0ac0e6 --- /dev/null +++ b/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Learn how to get started on your online ML journey with River, an open source Python ML library. The foundations of machine learning were built on offline batch processing techniques for model training and inference. As organisations become more dependent on real-time data, the technological trend for machine learning in production is moving towards adding an online stream processing approach. This has benefits such as lower computational requirements due to being able to incrementally learn from a stream of data points, which enables the continual upgrading of models by adapting to real-time changes in data.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1815, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Tun Shwe" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/GhDRKUT9gZA/maxresdefault.jpg", + "title": "Moving from Offline to Online Machine Learning with River", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=GhDRKUT9gZA" + } + ] +} diff --git a/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json b/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json new file mode 100644 index 000000000..23a2dac16 --- /dev/null +++ b/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "As large language models (LLMs) become increasingly integrated into industries like finance, healthcare, and law, ensuring their responsible deployment is critical—particularly in highly regulated environments. These industries face unique challenges, including data privacy, compliance with strict regulations, and minimizing the risks of biased or untrustworthy outputs.\n\nThis session will explore the complexities of using LLMs in regulated industries and present a governance framework to address these challenges. We'll cover practical solutions for deploying LLMs while adhering to industry-specific regulations, ensuring transparency, reducing bias, and maintaining data privacy. Attendees will learn how to implement governance best practices at various stages of the LLM lifecycle—from model training and validation to deployment and ongoing monitoring.\n\nDrawing on real-world examples and lessons learned, this talk will equip data scientists, machine learning engineers, and AI leaders with actionable strategies for navigating regulatory compliance and minimizing risks, while still harnessing the full potential of LLMs to drive innovation.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1455, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Vyoma Gajjar" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/__VU52cv6jk/maxresdefault.jpg", + "title": "LLMs in Regulated Industries: Challenges and Governance Solutions", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=__VU52cv6jk" + } + ] +} diff --git a/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json b/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json new file mode 100644 index 000000000..d9687d400 --- /dev/null +++ b/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "In this talk, I will offer my perspective on the modern data tools landscape and in particular user-facing tools for interactive data science and data exploration. The latest trends of composable data systems and embeddable query engines like DuckDB and DataFusion create both challenges and opportunities to create a more coherent and productive stack of tools for both end user data scientists and developers building data systems.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1804, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Wes McKinney" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/w4aYrav8-zE/maxresdefault.jpg", + "title": "Retooling for a Smaller Data Era", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=w4aYrav8-zE" + } + ] +} diff --git a/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json b/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json new file mode 100644 index 000000000..155e057c1 --- /dev/null +++ b/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json @@ -0,0 +1,28 @@ +{ + "description": "Traditional document processing for Retrieval-Augmented Generation (RAG) often involves cumbersome, error-prone extraction pipelines, hampering AI's ability to retrieve high-quality information from complex formats like PDFs and PowerPoint decks. ColPali disrupts this process by embedding entire pages—text, visuals, and layout—into rich, multi-vector representations using Vision Language Models (VLMs). This talk explores how ColPali, paired with multimodal models like the Llama 3.2 Vision series, enables RAG systems to “see” and reason over documents, dramatically improving retrieval performance. Attendees will learn to implement ColPali for enhanced, scalable, and robust enterprise knowledge retrieval.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 2151, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Zain Hasan" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/pnacsAWnjV8/maxresdefault.jpg", + "title": "ColPali’s Vision-Powered RAG for Enterprise Documents", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=pnacsAWnjV8" + } + ] +} diff --git a/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json b/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json new file mode 100644 index 000000000..984a2b2ba --- /dev/null +++ b/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json @@ -0,0 +1,28 @@ +{ + "description": "Providing timely maternal healthcare in developing countries is a critical challenge. This talk demonstrates how data-driven solutions can bridge healthcare gaps and improve access to vital healthcare information for pregnant women, with user privacy in mind. To do so, we fine-tuned the Gemma-2 2 billion parameter instruction model on a synthetic dataset in order to detect whether user messages pertain to urgent or non-urgent maternal healthcare issues. By quickly identifying and prioritizing user inquiries, the model can aid help desks by ensuring urgent messages are promptly forwarded to the appropriate healthcare professionals for immediate intervention.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "duration": 1809, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "Zhen (Tony) Zhao" + ], + "tags": [], + "thumbnail_url": "https://i.ytimg.com/vi/Lb0ecRiz4xE/maxresdefault.jpg", + "title": "Training Language Models to Identify Urgent Messages in Real-Time", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Lb0ecRiz4xE" + } + ] +}